diff --git a/go.mod b/go.mod
index dc73581..e4c34f9 100644
--- a/go.mod
+++ b/go.mod
@@ -1,18 +1,20 @@
module taldl
-go 1.14
+go 1.16
require (
- github.com/PuerkitoBio/goquery v1.5.1 // indirect
- github.com/antchfx/htmlquery v1.2.2 // indirect
- github.com/antchfx/xmlquery v1.2.3 // indirect
- github.com/antchfx/xpath v1.1.4 // indirect
+ github.com/PuerkitoBio/goquery v1.7.0 // indirect
+ github.com/antchfx/htmlquery v1.2.3 // indirect
+ github.com/antchfx/xmlquery v1.3.6 // indirect
github.com/gobwas/glob v0.2.3 // indirect
github.com/gocolly/colly v1.2.0
- github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
+ github.com/golang/protobuf v1.5.2 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
- github.com/temoto/robotstxt v1.1.1 // indirect
- golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect
- google.golang.org/appengine v1.6.5 // indirect
+ github.com/schollz/progressbar/v3 v3.8.2
+ github.com/stretchr/testify v1.7.0 // indirect
+ github.com/temoto/robotstxt v1.1.2 // indirect
+ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
+ google.golang.org/appengine v1.6.7 // indirect
+ gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)
diff --git a/go.sum b/go.sum
index 3b8f04c..fd7ff84 100644
--- a/go.sum
+++ b/go.sum
@@ -1,42 +1,88 @@
-github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
-github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
+github.com/PuerkitoBio/goquery v1.7.0 h1:O5SP3b9JWqMSVMG69zMfj577zwkSNpxrFf7ybS74eiw=
+github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
-github.com/antchfx/htmlquery v1.2.2 h1:exe4hUStBqXdRZ+9nB7EYA+W2zfIHIq3rRFpChh+VSk=
-github.com/antchfx/htmlquery v1.2.2/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
-github.com/antchfx/xmlquery v1.2.3 h1:++irmxT+Pkn55FGtSTkUTHarZ6E0b1yyR+UiPZRA+eY=
-github.com/antchfx/xmlquery v1.2.3/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
-github.com/antchfx/xpath v1.1.4 h1:naPIpjBGeT3eX0Vw7E8iyHsY8FGt6EbGdkcd8EZCo+g=
-github.com/antchfx/xpath v1.1.4/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
+github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
+github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
+github.com/antchfx/xmlquery v1.3.6 h1:kaEVzH1mNo/2AJZrhZjAaAUTy2Nn2zxGfYYU8jWfXOo=
+github.com/antchfx/xmlquery v1.3.6/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
+github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
+github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
+github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
-github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
+github.com/mattn/go-isatty v0.0.13/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
+github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
+github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
+github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
+github.com/schollz/progressbar/v3 v3.8.2 h1:2kZJwZCpb+E/V79kGO7daeq+hUwUJW0A5QD1Wv455dA=
+github.com/schollz/progressbar/v3 v3.8.2/go.mod h1:9KHLdyuXczIsyStQwzvW8xiELskmX7fQMaZdN23nAv8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
-github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
+github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e h1:gsTQYXdTw2Gq7RBsWvlQ91b+aEQ6bXFUngBGuR8sPpI=
+golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
-golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 h1:qWPm9rbaAMKs8Bq/9LRpbMqxWRVUAQwMI9fVrssnTfw=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 h1:RqytpXGR1iVNX7psjB3ff8y7sNFinVFvkx1c8SjBkio=
+golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b h1:9zKuko04nR4gjZ4+DNjHqRlAJqbJETHwiNKDqTfOjfE=
+golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
-google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
+google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
+gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/main.go b/main.go
index 2cd6dc2..fb66885 100644
--- a/main.go
+++ b/main.go
@@ -1,92 +1,217 @@
package main
import (
+ "archive/zip"
"context"
"encoding/json"
"flag"
"fmt"
+ "io"
"io/ioutil"
"net/http"
"os"
"os/user"
"path"
"path/filepath"
+ "strconv"
+ "strings"
+ "sync"
"time"
"github.com/gocolly/colly"
+ "github.com/schollz/progressbar/v3"
)
-var store = "~/.taldl.json"
-
-var output = flag.String("output", "epub", "output directory")
-var format = flag.String("format", "epub", "format to download")
-
-func main() {
- flag.Parse()
-
- store = fixpath(store)
- *output = fixpath(*output)
-
- os.MkdirAll(*output, os.ModePerm)
-
- oldMostRecent := ""
- newMostRecent := ""
-
- data, _ := ioutil.ReadFile(store)
- _ = json.Unmarshal(data, &oldMostRecent)
-
- done := false
-
- c := colly.NewCollector()
-
- c.OnHTML("div.amw-listing-item a[href]", func(e *colly.HTMLElement) {
- url := e.Attr("href") + "." + *format
-
- if newMostRecent == "" {
- newMostRecent = url
- }
-
- if newMostRecent == oldMostRecent {
- done = true
- return
- }
-
- fmt.Println(url)
-
- name := filenameForUrl(url)
- path := *output + "/" + name
-
- data, err := get(url, time.Second*3)
- if err != nil {
- fmt.Println(err)
- return
- }
-
- err = ioutil.WriteFile(path, data, 0644)
- if err != nil {
- fmt.Println(err)
- }
- })
-
- c.OnHTML("a[href] i.fa-chevron-right", func(e *colly.HTMLElement) {
- next, _ := e.DOM.Parent().Attr("href")
-
- if !done {
- e.Request.Visit(next)
- }
- })
-
- c.OnRequest(func(r *colly.Request) {
- fmt.Println("Checking", r.URL)
- })
-
- c.Visit("https://theanarchistlibrary.org/latest")
-
- data, _ = json.Marshal(newMostRecent)
- _ = ioutil.WriteFile(store, data, 0644)
+type Cache struct {
+ MostRecent string
+ Pages int
}
-func fixpath(path string) string {
+var cacheFile = "~/.taldl.json"
+
+var outputDir = flag.String("output", "~/TAL", "output directory")
+var formats = flag.String("formats", "zip,epub,pdf,a4.pdf,lt.pdf", "formats to download")
+var progress = flag.Bool("progress", true, "show progress bar")
+var verbose = flag.Bool("verbose", true, "verbose output")
+var workers = flag.Int("workers", 5, "amount of workers")
+var update = flag.Bool("update", false, "update all entries")
+
+var mostRecent string
+var done bool
+var cache Cache
+var useFormat map[string]bool
+
+func main() {
+ var bar *progressbar.ProgressBar
+ var lastPage int
+ var hrefs = []string{}
+ var hrefsMutex sync.Mutex
+ var done bool
+
+ flag.Parse()
+
+ useFormat = make(map[string]bool)
+ fmts := strings.Split(*formats, ",")
+ for _, f := range fmts {
+ useFormat[f] = true
+ }
+
+ cacheFile = fixPath(cacheFile)
+ cacheData, _ := ioutil.ReadFile(cacheFile)
+ _ = json.Unmarshal(cacheData, &cache)
+
+ *outputDir = fixPath(*outputDir)
+
+ os.MkdirAll(*outputDir, os.ModePerm)
+
+ c := colly.NewCollector()
+ c.OnHTML("ul.pagination li:nth-last-child(2)", func(e *colly.HTMLElement) {
+ lastPage, _ = strconv.Atoi(strings.TrimSpace(e.Text))
+ })
+ c.OnHTML("div.amw-listing-item:nth-child(1) a[href]", func(e *colly.HTMLElement) {
+ href := e.Attr("href")
+ mostRecent = href
+ })
+ c.OnHTML("div.amw-listing-item a[href]", func(e *colly.HTMLElement) {
+ href := e.Attr("href")
+ if cache.MostRecent == href {
+ done = true
+ }
+ if !done || *update {
+ hrefs = append(hrefs, href)
+ }
+ })
+
+ url := "https://theanarchistlibrary.org/latest/1"
+ c.Visit(url)
+
+ newPages := 1
+
+ if *update {
+ newPages = lastPage
+ } else {
+ newPages = lastPage - cache.Pages
+ }
+
+ if newPages > 0 {
+ if *verbose {
+ fmt.Fprintf(os.Stderr, "Checking latest entries... \r\n")
+ }
+
+ if *progress {
+ bar = progressbar.Default(int64(newPages))
+ bar.Add(1)
+ }
+ }
+
+ scanJobs := make(chan int, newPages)
+ scanResults := make(chan []string, newPages)
+
+ for w := 1; w <= *workers; w++ {
+ go scanner(w, scanJobs, scanResults)
+ }
+ for a := 2; a <= newPages; a++ {
+ scanJobs <- a
+ }
+ close(scanJobs)
+
+ for a := 2; a <= newPages; a++ {
+ result := <-scanResults
+ hrefsMutex.Lock()
+ hrefs = append(hrefs, result...)
+ hrefsMutex.Unlock()
+ if *progress {
+ bar.Add(1)
+ }
+ }
+ close(scanResults)
+
+ cache.Pages = lastPage
+ cache.MostRecent = mostRecent
+
+ numJobs := len(hrefs)
+
+ if numJobs == 0 {
+ save(cache)
+ return
+ }
+
+ if *verbose {
+ fmt.Fprintf(os.Stderr, "Checking %d entries for updates...\r\n", numJobs)
+ }
+
+ if *progress {
+ bar = progressbar.Default(int64(numJobs))
+ bar.ChangeMax(numJobs)
+ }
+
+ checkJobs := make(chan string, numJobs)
+ checkResults := make(chan string, numJobs)
+ downloadJobs := make(chan string, numJobs)
+ downloadResults := make(chan string, numJobs)
+ downloadCount := 0
+
+ for w := 1; w <= *workers; w++ {
+ go checker(w, checkJobs, checkResults)
+ }
+ for _, href := range hrefs {
+ checkJobs <- href
+ }
+ close(checkJobs)
+
+ for a := 1; a <= numJobs; a++ {
+ r := <-checkResults
+ if *progress {
+ bar.Add(1)
+ }
+ if r != "" {
+ downloadJobs <- r
+ downloadCount++
+ }
+ }
+ close(checkResults)
+ close(downloadJobs)
+
+ if downloadCount == 0 {
+ save(cache)
+ return
+ }
+
+ if *verbose {
+ fmt.Fprintf(os.Stderr, "Downloading %d entries...\r\n", downloadCount)
+ }
+
+ if *progress {
+ bar = progressbar.Default(int64(downloadCount))
+ }
+
+ for w := 1; w <= *workers; w++ {
+ go downloader(w, downloadJobs, downloadResults)
+ }
+
+ buffer := ""
+
+ for a := 1; a <= downloadCount; a++ {
+ r := <-downloadResults
+ if *progress {
+ bar.Add(1)
+ }
+ if r != "" {
+ buffer += r
+ }
+ }
+ close(downloadResults)
+
+ save(cache)
+
+ if buffer != "" {
+ if *verbose {
+ fmt.Fprintln(os.Stderr, buffer)
+ }
+ }
+}
+
+func fixPath(path string) string {
path = filepath.FromSlash(path)
if (path)[0] == '~' {
user, err := user.Current()
@@ -104,13 +229,15 @@ func filenameForUrl(url string) string {
return path.Base(url)
}
-func get(url string, timeout time.Duration) (content []byte, err error) {
- request, err := http.NewRequest("GET", url, nil)
+func check(url string, path string, timeout time.Duration) (modified bool, err error) {
+ request, err := http.NewRequest("HEAD", url, nil)
if err != nil {
return
}
- ctx, cancel_func := context.WithTimeout(context.Background(), timeout)
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+
request = request.WithContext(ctx)
response, err := http.DefaultClient.Do(request)
@@ -119,9 +246,65 @@ func get(url string, timeout time.Duration) (content []byte, err error) {
}
defer response.Body.Close()
+ if response.StatusCode == 404 {
+ return false, nil
+ }
+
if response.StatusCode != 200 {
- cancel_func()
- return nil, fmt.Errorf("INVALID RESPONSE: %s", response.Status)
+ return false, fmt.Errorf("%s: %s", url, response.Status)
+ }
+
+ lmh := response.Header.Get("Last-Modified")
+ lm, err := http.ParseTime(lmh)
+ if err != nil {
+ return true, err
+ }
+
+ file, err := os.Stat(path)
+ if err != nil {
+ return true, nil
+ }
+
+ clh := response.Header.Get("Content-Length")
+ cl, err := strconv.ParseInt(clh, 10, 0)
+ if err != nil {
+ return true, nil
+ }
+
+ if file.Size() != cl {
+ return true, nil
+ }
+
+ if lm.After(file.ModTime()) {
+ return true, nil
+ }
+
+ return false, nil
+}
+
+func get(url string, timeout time.Duration) (content []byte, err error) {
+ request, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+
+ request = request.WithContext(ctx)
+
+ response, err := http.DefaultClient.Do(request)
+ if err != nil {
+ return
+ }
+ defer response.Body.Close()
+
+ if response.StatusCode == 404 {
+ return
+ }
+
+ if response.StatusCode != 200 {
+ return nil, fmt.Errorf("%s: %s", url, response.Status)
}
return ioutil.ReadAll(response.Body)
@@ -134,3 +317,202 @@ func fileExists(path string) bool {
}
return !info.IsDir()
}
+
+func download(url string, path string) (err error) {
+ for {
+ data, err := get(url, time.Second*60)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s: %s\n", url, err)
+ time.Sleep(time.Second)
+ continue
+ }
+
+ err = ioutil.WriteFile(path, data, 0644)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ return err
+ }
+ break
+ }
+
+ return nil
+}
+
+func unzip(src string, dest string) ([]string, error) {
+ var filenames []string
+
+ r, err := zip.OpenReader(src)
+ if err != nil {
+ return filenames, err
+ }
+ defer r.Close()
+
+ for _, f := range r.File {
+ fpath := filepath.Join(dest, f.Name)
+
+ if !strings.HasPrefix(fpath, filepath.Clean(dest)+string(os.PathSeparator)) {
+ return filenames, fmt.Errorf("%s: bad file path", fpath)
+ }
+
+ filenames = append(filenames, fpath)
+
+ if f.FileInfo().IsDir() {
+ os.MkdirAll(fpath, os.ModePerm)
+ continue
+ }
+
+ if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil {
+ return filenames, err
+ }
+
+ outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
+ if err != nil {
+ return filenames, err
+ }
+
+ rc, err := f.Open()
+ if err != nil {
+ return filenames, err
+ }
+
+ _, err = io.Copy(outFile, rc)
+
+ outFile.Close()
+ rc.Close()
+
+ if err != nil {
+ return filenames, err
+ }
+ }
+ return filenames, nil
+}
+
+func downloader(id int, jobs <-chan string, results chan<- string) {
+
+ for href := range jobs {
+ result := ""
+ dir := filenameForUrl(href)
+ dest := filepath.Join(*outputDir, dir)
+ os.MkdirAll(dest, 0700)
+
+ downloadFormat(href, "zip", dest)
+ downloadFormat(href, "epub", dest)
+ downloadFormat(href, "pdf", dest)
+ downloadFormat(href, "a4.pdf", dest)
+ downloadFormat(href, "lt.pdf", dest)
+
+ if *verbose {
+ results <- result
+ } else {
+ results <- ""
+ }
+ }
+}
+
+func scanner(id int, jobs <-chan int, results chan<- []string) {
+ var result []string
+
+ c := colly.NewCollector()
+ c.OnHTML("div.amw-listing-item a[href]", func(e *colly.HTMLElement) {
+ href := e.Attr("href")
+ if cache.MostRecent == href {
+ done = true
+ }
+ if !done {
+ result = append(result, href)
+ }
+ })
+ c.AllowURLRevisit = true
+
+ for i := range jobs {
+ url := fmt.Sprintf("https://theanarchistlibrary.org/latest/%d", i)
+ for {
+ result = []string{}
+ err := c.Visit(url)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ time.Sleep(time.Second)
+ } else {
+ break
+ }
+ }
+ results <- result
+ }
+}
+
+func checker(id int, jobs <-chan string, results chan<- string) {
+ var modified bool
+ var err error
+
+ for href := range jobs {
+ ext := "muse"
+ url := href + "." + ext
+ dir := filenameForUrl(href)
+ name := filenameForUrl(url)
+ path := filepath.Join(*outputDir, dir, name)
+
+ if !fileExists(path) {
+ results <- href
+ continue
+ }
+
+ for {
+ modified, err = check(url, path, time.Second*30)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ time.Sleep(time.Second)
+ continue
+ }
+ break
+ }
+
+ if modified {
+ results <- href
+ } else {
+ results <- ""
+ }
+ }
+}
+
+func save(cache Cache) {
+ cacheData, _ := json.Marshal(&cache)
+ _ = ioutil.WriteFile(cacheFile, cacheData, 0644)
+}
+
+func downloadFormat(href string, ext string, dest string) error {
+ if !useFormat[ext] {
+ return nil
+ }
+
+ url := href + "." + ext
+ name := filenameForUrl(url)
+
+ if ext == "zip" {
+ tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl")
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ return err
+ }
+ path := filepath.Join(tmpDir, name)
+ err = download(url, path)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ return err
+ }
+ _, err = unzip(path, *outputDir)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ return err
+ }
+ os.RemoveAll(tmpDir)
+ } else {
+ path := filepath.Join(dest, name)
+ err := download(url, path)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ return err
+ }
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitattributes b/vendor/github.com/PuerkitoBio/goquery/.gitattributes
new file mode 100644
index 0000000..0cc26ec
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/.gitattributes
@@ -0,0 +1 @@
+testdata/* linguist-vendored
diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitignore b/vendor/github.com/PuerkitoBio/goquery/.gitignore
new file mode 100644
index 0000000..970381c
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/.gitignore
@@ -0,0 +1,16 @@
+# editor temporary files
+*.sublime-*
+.DS_Store
+*.swp
+#*.*#
+tags
+
+# direnv config
+.env*
+
+# test binaries
+*.test
+
+# coverage and profilte outputs
+*.out
+
diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
new file mode 100644
index 0000000..8430c86
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/.travis.yml
@@ -0,0 +1,31 @@
+arch:
+ - amd64
+ - ppc64le
+language: go
+
+go:
+ - 1.7.x
+ - 1.8.x
+ - 1.9.x
+ - 1.10.x
+ - 1.11.x
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - 1.15.x
+ - tip
+
+jobs:
+ exclude:
+ - arch: ppc64le
+ go: 1.7.x
+ - arch: ppc64le
+ go: 1.8.x
+ - arch: ppc64le
+ go: 1.9.x
+ - arch: ppc64le
+ go: 1.10.x
+ - arch: ppc64le
+ go: 1.11.x
+ - arch: ppc64le
+ go: 1.12.x
diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE
new file mode 100644
index 0000000..25372c2
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE
@@ -0,0 +1,12 @@
+Copyright (c) 2012-2021, Martin Angers & Contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md
new file mode 100644
index 0000000..924cef7
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/README.md
@@ -0,0 +1,190 @@
+# goquery - a little like that j-thing, only in Go
+[](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?) [](http://travis-ci.org/PuerkitoBio/goquery) [](https://pkg.go.dev/github.com/PuerkitoBio/goquery) [](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
+
+goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
+
+Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. See the [wiki][] for various options to do this.
+
+Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...).
+
+## Table of Contents
+
+* [Installation](#installation)
+* [Changelog](#changelog)
+* [API](#api)
+* [Examples](#examples)
+* [Related Projects](#related-projects)
+* [Support](#support)
+* [License](#license)
+
+## Installation
+
+Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
+
+ $ go get github.com/PuerkitoBio/goquery
+
+(optional) To run unit tests:
+
+ $ cd $GOPATH/src/github.com/PuerkitoBio/goquery
+ $ go test
+
+(optional) To run benchmarks (warning: it runs for a few minutes):
+
+ $ cd $GOPATH/src/github.com/PuerkitoBio/goquery
+ $ go test -bench=".*"
+
+## Changelog
+
+**Note that goquery's API is now stable, and will not break.**
+
+* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
+* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
+* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
+* **2020-02-04 (v1.5.1)** : Update module dependencies.
+* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
+* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
+* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
+* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue).
+* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins).
+* **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv).
+* **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb).
+* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
+* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
+* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
+* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
+* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
+* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
+* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
+* **2014-11-07** : Add manipulation functions (thanks to [Andrew Stone][thatguystone]) and `*Matcher` functions, that receive compiled cascadia selectors instead of selector strings, thus avoiding potential panics thrown by goquery via `cascadia.MustCompile` calls. This results in better performance (selectors can be compiled once and reused) and more idiomatic error handling (you can handle cascadia's compilation errors, instead of recovering from panics, which had been bugging me for a long time). Note that the actual type expected is a `Matcher` interface, that `cascadia.Selector` implements. Other matcher implementations could be used.
+* **2014-11-06** : Change import paths of net/html to golang.org/x/net/html (see https://groups.google.com/forum/#!topic/golang-nuts/eD8dh3T9yyA). Make sure to update your code to use the new import path too when you call goquery with `html.Node`s.
+* **v0.3.2** : Add `NewDocumentFromReader()` (thanks jweir) which allows creating a goquery document from an io.Reader.
+* **v0.3.1** : Add `NewDocumentFromResponse()` (thanks assassingj) which allows creating a goquery document from an http response.
+* **v0.3.0** : Add `EachWithBreak()` which allows to break out of an `Each()` loop by returning false. This function was added instead of changing the existing `Each()` to avoid breaking compatibility.
+* **v0.2.1** : Make go-getable, now that [go.net/html is Go1.0-compatible][gonet] (thanks to @matrixik for pointing this out).
+* **v0.2.0** : Add support for negative indices in Slice(). **BREAKING CHANGE** `Document.Root` is removed, `Document` is now a `Selection` itself (a selection of one, the root element, just like `Document.Root` was before). Add jQuery's Closest() method.
+* **v0.1.1** : Add benchmarks to use as baseline for refactorings, refactor Next...() and Prev...() methods to use the new html package's linked list features (Next/PrevSibling, FirstChild). Good performance boost (40+% in some cases).
+* **v0.1.0** : Initial release.
+
+## API
+
+goquery exposes two structs, `Document` and `Selection`, and the `Matcher` interface. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, goquery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` type is for. It holds the root document node as the initial Selection value to manipulate.
+
+jQuery often has many variants for the same function (no argument, a selector string argument, a jQuery object argument, a DOM element argument, ...). Instead of exposing the same features in goquery as a single method with variadic empty interface arguments, statically-typed signatures are used following this naming convention:
+
+* When the jQuery equivalent can be called with no argument, it has the same name as jQuery for the no argument signature (e.g.: `Prev()`), and the version with a selector string argument is called `XxxFiltered()` (e.g.: `PrevFiltered()`)
+* When the jQuery equivalent **requires** one argument, the same name as jQuery is used for the selector string version (e.g.: `Is()`)
+* The signatures accepting a jQuery object as argument are defined in goquery as `XxxSelection()` and take a `*Selection` object as argument (e.g.: `FilterSelection()`)
+* The signatures accepting a DOM element as argument in jQuery are defined in goquery as `XxxNodes()` and take a variadic argument of type `*html.Node` (e.g.: `FilterNodes()`)
+* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
+* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`)
+
+Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
+
+The complete [package reference documentation can be found here][doc].
+
+Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
+
+* `Find("~")` returns an empty selection because the selector string doesn't match anything.
+* `Add("~")` returns a new selection that holds the same nodes as the original selection, because it didn't add any node (selector string didn't match anything).
+* `ParentsFiltered("~")` returns an empty selection because the selector string doesn't match anything.
+* `ParentsUntil("~")` returns all parents of the selection because the selector string didn't match any element to stop before the top element.
+
+## Examples
+
+See some tips and tricks in the [wiki][].
+
+Adapted from example_test.go:
+
+```Go
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/http"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+func ExampleScrape() {
+ // Request the HTML page.
+ res, err := http.Get("http://metalsucks.net")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
+ }
+
+ // Load the HTML document
+ doc, err := goquery.NewDocumentFromReader(res.Body)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Find the review items
+ doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
+ // For each item found, get the title
+ title := s.Find("a").Text()
+ fmt.Printf("Review %d: %s\n", i, title)
+ })
+}
+
+func main() {
+ ExampleScrape()
+}
+```
+
+## Related Projects
+
+- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
+- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
+- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
+- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
+- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
+- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
+- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
+- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
+- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
+- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
+- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
+
+## Support
+
+There are a number of ways you can support the project:
+
+* Use it, star it, build something with it, spread the word!
+ - If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section!
+* Raise issues to improve the project (note: doc typos and clarifications are issues too!)
+ - Please search existing issues before opening a new one - it may have already been adressed.
+* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
+ - Make sure new code is tested.
+ - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
+* Sponsor the developer
+ - See the Github Sponsor button at the top of the repo on github
+ - or via BuyMeACoffee.com, below
+
+
+
+## License
+
+The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic].
+
+[jquery]: http://jquery.com/
+[go]: http://golang.org/
+[cascadia]: https://github.com/andybalholm/cascadia
+[cascadiacli]: https://github.com/suntong/cascadia
+[bsd]: http://opensource.org/licenses/BSD-3-Clause
+[golic]: http://golang.org/LICENSE
+[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
+[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
+[index]: http://api.jquery.com/index/
+[gonet]: https://github.com/golang/net/
+[html]: https://pkg.go.dev/golang.org/x/net/html
+[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
+[thatguystone]: https://github.com/thatguystone
+[piotr]: https://github.com/piotrkowalczuk
+[goq]: https://github.com/andrewstuart/goq
+[thiemok]: https://github.com/thiemok
+[djw]: https://github.com/davidjwilkins
diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go
new file mode 100644
index 0000000..1b1f6cb
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/array.go
@@ -0,0 +1,124 @@
+package goquery
+
+import (
+ "golang.org/x/net/html"
+)
+
+const (
+ maxUint = ^uint(0)
+ maxInt = int(maxUint >> 1)
+
+ // ToEnd is a special index value that can be used as end index in a call
+ // to Slice so that all elements are selected until the end of the Selection.
+ // It is equivalent to passing (*Selection).Length().
+ ToEnd = maxInt
+)
+
+// First reduces the set of matched elements to the first in the set.
+// It returns a new Selection object, and an empty Selection object if the
+// the selection is empty.
+func (s *Selection) First() *Selection {
+ return s.Eq(0)
+}
+
+// Last reduces the set of matched elements to the last in the set.
+// It returns a new Selection object, and an empty Selection object if
+// the selection is empty.
+func (s *Selection) Last() *Selection {
+ return s.Eq(-1)
+}
+
+// Eq reduces the set of matched elements to the one at the specified index.
+// If a negative index is given, it counts backwards starting at the end of the
+// set. It returns a new Selection object, and an empty Selection object if the
+// index is invalid.
+func (s *Selection) Eq(index int) *Selection {
+ if index < 0 {
+ index += len(s.Nodes)
+ }
+
+ if index >= len(s.Nodes) || index < 0 {
+ return newEmptySelection(s.document)
+ }
+
+ return s.Slice(index, index+1)
+}
+
+// Slice reduces the set of matched elements to a subset specified by a range
+// of indices. The start index is 0-based and indicates the index of the first
+// element to select. The end index is 0-based and indicates the index at which
+// the elements stop being selected (the end index is not selected).
+//
+// The indices may be negative, in which case they represent an offset from the
+// end of the selection.
+//
+// The special value ToEnd may be specified as end index, in which case all elements
+// until the end are selected. This works both for a positive and negative start
+// index.
+func (s *Selection) Slice(start, end int) *Selection {
+ if start < 0 {
+ start += len(s.Nodes)
+ }
+ if end == ToEnd {
+ end = len(s.Nodes)
+ } else if end < 0 {
+ end += len(s.Nodes)
+ }
+ return pushStack(s, s.Nodes[start:end])
+}
+
+// Get retrieves the underlying node at the specified index.
+// Get without parameter is not implemented, since the node array is available
+// on the Selection object.
+func (s *Selection) Get(index int) *html.Node {
+ if index < 0 {
+ index += len(s.Nodes) // Negative index gets from the end
+ }
+ return s.Nodes[index]
+}
+
+// Index returns the position of the first element within the Selection object
+// relative to its sibling elements.
+func (s *Selection) Index() int {
+ if len(s.Nodes) > 0 {
+ return newSingleSelection(s.Nodes[0], s.document).PrevAll().Length()
+ }
+ return -1
+}
+
+// IndexSelector returns the position of the first element within the
+// Selection object relative to the elements matched by the selector, or -1 if
+// not found.
+func (s *Selection) IndexSelector(selector string) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.Find(selector)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexMatcher returns the position of the first element within the
+// Selection object relative to the elements matched by the matcher, or -1 if
+// not found.
+func (s *Selection) IndexMatcher(m Matcher) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.FindMatcher(m)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexOfNode returns the position of the specified node within the Selection
+// object, or -1 if not found.
+func (s *Selection) IndexOfNode(node *html.Node) int {
+ return indexInSlice(s.Nodes, node)
+}
+
+// IndexOfSelection returns the position of the first node in the specified
+// Selection object within this Selection object, or -1 if not found.
+func (s *Selection) IndexOfSelection(sel *Selection) int {
+ if sel != nil && len(sel.Nodes) > 0 {
+ return indexInSlice(s.Nodes, sel.Nodes[0])
+ }
+ return -1
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/doc.go b/vendor/github.com/PuerkitoBio/goquery/doc.go
new file mode 100644
index 0000000..71146a7
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/doc.go
@@ -0,0 +1,123 @@
+// Copyright (c) 2012-2016, Martin Angers & Contributors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.
+// * Neither the name of the author nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+Package goquery implements features similar to jQuery, including the chainable
+syntax, to manipulate and query an HTML document.
+
+It brings a syntax and a set of features similar to jQuery to the Go language.
+It is based on Go's net/html package and the CSS Selector library cascadia.
+Since the net/html parser returns nodes, and not a full-featured DOM
+tree, jQuery's stateful manipulation functions (like height(), css(), detach())
+have been left off.
+
+Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is
+the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
+See the repository's wiki for various options on how to do this.
+
+Syntax-wise, it is as close as possible to jQuery, with the same method names when
+possible, and that warm and fuzzy chainable interface. jQuery being the
+ultra-popular library that it is, writing a similar HTML-manipulating
+library was better to follow its API than to start anew (in the same spirit as
+Go's fmt package), even though some of its methods are less than intuitive (looking
+at you, index()...).
+
+It is hosted on GitHub, along with additional documentation in the README.md
+file: https://github.com/puerkitobio/goquery
+
+Please note that because of the net/html dependency, goquery requires Go1.1+.
+
+The various methods are split into files based on the category of behavior.
+The three dots (...) indicate that various "overloads" are available.
+
+* array.go : array-like positional manipulation of the selection.
+ - Eq()
+ - First()
+ - Get()
+ - Index...()
+ - Last()
+ - Slice()
+
+* expand.go : methods that expand or augment the selection's set.
+ - Add...()
+ - AndSelf()
+ - Union(), which is an alias for AddSelection()
+
+* filter.go : filtering methods, that reduce the selection's set.
+ - End()
+ - Filter...()
+ - Has...()
+ - Intersection(), which is an alias of FilterSelection()
+ - Not...()
+
+* iteration.go : methods to loop over the selection's nodes.
+ - Each()
+ - EachWithBreak()
+ - Map()
+
+* manipulation.go : methods for modifying the document
+ - After...()
+ - Append...()
+ - Before...()
+ - Clone()
+ - Empty()
+ - Prepend...()
+ - Remove...()
+ - ReplaceWith...()
+ - Unwrap()
+ - Wrap...()
+ - WrapAll...()
+ - WrapInner...()
+
+* property.go : methods that inspect and get the node's properties values.
+ - Attr*(), RemoveAttr(), SetAttr()
+ - AddClass(), HasClass(), RemoveClass(), ToggleClass()
+ - Html()
+ - Length()
+ - Size(), which is an alias for Length()
+ - Text()
+
+* query.go : methods that query, or reflect, a node's identity.
+ - Contains()
+ - Is...()
+
+* traversal.go : methods to traverse the HTML document tree.
+ - Children...()
+ - Contents()
+ - Find...()
+ - Next...()
+ - Parent[s]...()
+ - Prev...()
+ - Siblings...()
+
+* type.go : definition of the types exposed by goquery.
+ - Document
+ - Selection
+ - Matcher
+
+* utilities.go : definition of helper functions (and not methods on a *Selection)
+that are not part of jQuery, but are useful to goquery.
+ - NodeName
+ - OuterHtml
+*/
+package goquery
diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go
new file mode 100644
index 0000000..7caade5
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/expand.go
@@ -0,0 +1,70 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Add adds the selector string's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The selector string is run in the context of the document of the current
+// Selection object.
+func (s *Selection) Add(selector string) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, compileMatcher(selector))...)
+}
+
+// AddMatcher adds the matcher's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The matcher is run in the context of the document of the current
+// Selection object.
+func (s *Selection) AddMatcher(m Matcher) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, m)...)
+}
+
+// AddSelection adds the specified Selection object's nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.AddNodes()
+ }
+ return s.AddNodes(sel.Nodes...)
+}
+
+// Union is an alias for AddSelection.
+func (s *Selection) Union(sel *Selection) *Selection {
+ return s.AddSelection(sel)
+}
+
+// AddNodes adds the specified nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, appendWithoutDuplicates(s.Nodes, nodes, nil))
+}
+
+// AndSelf adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+// Deprecated: This function has been deprecated and is now an alias for AddBack().
+func (s *Selection) AndSelf() *Selection {
+ return s.AddBack()
+}
+
+// AddBack adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+func (s *Selection) AddBack() *Selection {
+ return s.AddSelection(s.prevSel)
+}
+
+// AddBackFiltered reduces the previous set of elements on the stack to those that
+// match the selector string, and adds them to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackFiltered(selector string) *Selection {
+ return s.AddSelection(s.prevSel.Filter(selector))
+}
+
+// AddBackMatcher reduces the previous set of elements on the stack to those that match
+// the mateher, and adds them to the curernt set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackMatcher(m Matcher) *Selection {
+ return s.AddSelection(s.prevSel.FilterMatcher(m))
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/filter.go b/vendor/github.com/PuerkitoBio/goquery/filter.go
new file mode 100644
index 0000000..9138ffb
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/filter.go
@@ -0,0 +1,163 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Filter reduces the set of matched elements to those that match the selector string.
+// It returns a new Selection object for this subset of matching elements.
+func (s *Selection) Filter(selector string) *Selection {
+ return s.FilterMatcher(compileMatcher(selector))
+}
+
+// FilterMatcher reduces the set of matched elements to those that match
+// the given matcher. It returns a new Selection object for this subset
+// of matching elements.
+func (s *Selection) FilterMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, true))
+}
+
+// Not removes elements from the Selection that match the selector string.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) Not(selector string) *Selection {
+ return s.NotMatcher(compileMatcher(selector))
+}
+
+// NotMatcher removes elements from the Selection that match the given matcher.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, false))
+}
+
+// FilterFunction reduces the set of matched elements to those that pass the function's test.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, true))
+}
+
+// NotFunction removes elements from the Selection that pass the function's test.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, false))
+}
+
+// FilterNodes reduces the set of matched elements to those that match the specified nodes.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, true))
+}
+
+// NotNodes removes elements from the Selection that match the specified nodes.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, false))
+}
+
+// FilterSelection reduces the set of matched elements to those that match a
+// node in the specified Selection object.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, true))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, true))
+}
+
+// NotSelection removes elements from the Selection that match a node in the specified
+// Selection object. It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, false))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, false))
+}
+
+// Intersection is an alias for FilterSelection.
+func (s *Selection) Intersection(sel *Selection) *Selection {
+ return s.FilterSelection(sel)
+}
+
+// Has reduces the set of matched elements to those that have a descendant
+// that matches the selector.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) Has(selector string) *Selection {
+ return s.HasSelection(s.document.Find(selector))
+}
+
+// HasMatcher reduces the set of matched elements to those that have a descendant
+// that matches the matcher.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasMatcher(m Matcher) *Selection {
+ return s.HasSelection(s.document.FindMatcher(m))
+}
+
+// HasNodes reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasNodes(nodes ...*html.Node) *Selection {
+ return s.FilterFunction(func(_ int, sel *Selection) bool {
+ // Add all nodes that contain one of the specified nodes
+ for _, n := range nodes {
+ if sel.Contains(n) {
+ return true
+ }
+ }
+ return false
+ })
+}
+
+// HasSelection reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes of the specified Selection object.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.HasNodes()
+ }
+ return s.HasNodes(sel.Nodes...)
+}
+
+// End ends the most recent filtering operation in the current chain and
+// returns the set of matched elements to its previous state.
+func (s *Selection) End() *Selection {
+ if s.prevSel != nil {
+ return s.prevSel
+ }
+ return newEmptySelection(s.document)
+}
+
+// Filter based on the matcher, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnow(sel *Selection, m Matcher, keep bool) []*html.Node {
+ // Optimize if keep is requested
+ if keep {
+ return m.Filter(sel.Nodes)
+ }
+ // Use grep
+ return grep(sel, func(i int, s *Selection) bool {
+ return !m.Match(s.Get(0))
+ })
+}
+
+// Filter based on an array of nodes, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node {
+ if len(nodes)+len(sel.Nodes) < minNodesForSet {
+ return grep(sel, func(i int, s *Selection) bool {
+ return isInSlice(nodes, s.Get(0)) == keep
+ })
+ }
+
+ set := make(map[*html.Node]bool)
+ for _, n := range nodes {
+ set[n] = true
+ }
+ return grep(sel, func(i int, s *Selection) bool {
+ return set[s.Get(0)] == keep
+ })
+}
+
+// Filter based on a function test, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowFunction(sel *Selection, f func(int, *Selection) bool, keep bool) []*html.Node {
+ return grep(sel, func(i int, s *Selection) bool {
+ return f(i, s) == keep
+ })
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod
new file mode 100644
index 0000000..95826ad
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/go.mod
@@ -0,0 +1,8 @@
+module github.com/PuerkitoBio/goquery
+
+require (
+ github.com/andybalholm/cascadia v1.1.0
+ golang.org/x/net v0.0.0-20200202094626-16171245cfb2
+)
+
+go 1.13
diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum
new file mode 100644
index 0000000..bc79107
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/go.sum
@@ -0,0 +1,8 @@
+github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
+github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/vendor/github.com/PuerkitoBio/goquery/iteration.go b/vendor/github.com/PuerkitoBio/goquery/iteration.go
new file mode 100644
index 0000000..e246f2e
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/iteration.go
@@ -0,0 +1,39 @@
+package goquery
+
+// Each iterates over a Selection object, executing a function for each
+// matched element. It returns the current Selection object. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Each(f func(int, *Selection)) *Selection {
+ for i, n := range s.Nodes {
+ f(i, newSingleSelection(n, s.document))
+ }
+ return s
+}
+
+// EachWithBreak iterates over a Selection object, executing a function for each
+// matched element. It is identical to Each except that it is possible to break
+// out of the loop by returning false in the callback function. It returns the
+// current Selection object.
+func (s *Selection) EachWithBreak(f func(int, *Selection) bool) *Selection {
+ for i, n := range s.Nodes {
+ if !f(i, newSingleSelection(n, s.document)) {
+ return s
+ }
+ }
+ return s
+}
+
+// Map passes each element in the current matched set through a function,
+// producing a slice of string holding the returned values. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Map(f func(int, *Selection) string) (result []string) {
+ for i, n := range s.Nodes {
+ result = append(result, f(i, newSingleSelection(n, s.document)))
+ }
+
+ return result
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
new file mode 100644
index 0000000..35febf1
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
@@ -0,0 +1,679 @@
+package goquery
+
+import (
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// After applies the selector from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) After(selector string) *Selection {
+ return s.AfterMatcher(compileMatcher(selector))
+}
+
+// AfterMatcher applies the matcher from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterMatcher(m Matcher) *Selection {
+ return s.AfterNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AfterSelection inserts the elements in the selection after each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterSelection(sel *Selection) *Selection {
+ return s.AfterNodes(sel.Nodes...)
+}
+
+// AfterHtml parses the html and inserts it after the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
+}
+
+// AfterNodes inserts the nodes after each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn.NextSibling)
+ }
+ })
+}
+
+// Append appends the elements specified by the selector to the end of each element
+// in the set of matched elements, following those rules:
+//
+// 1) The selector is applied to the root document.
+//
+// 2) Elements that are part of the document will be moved to the new location.
+//
+// 3) If there are multiple locations to append to, cloned nodes will be
+// appended to all target locations except the last one, which will be moved
+// as noted in (2).
+func (s *Selection) Append(selector string) *Selection {
+ return s.AppendMatcher(compileMatcher(selector))
+}
+
+// AppendMatcher appends the elements specified by the matcher to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendMatcher(m Matcher) *Selection {
+ return s.AppendNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AppendSelection appends the elements in the selection to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendSelection(sel *Selection) *Selection {
+ return s.AppendNodes(sel.Nodes...)
+}
+
+// AppendHtml parses the html and appends it to the set of matched elements.
+func (s *Selection) AppendHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
+}
+
+// AppendNodes appends the specified nodes to each node in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ sn.AppendChild(n)
+ })
+}
+
+// Before inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) Before(selector string) *Selection {
+ return s.BeforeMatcher(compileMatcher(selector))
+}
+
+// BeforeMatcher inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeMatcher(m Matcher) *Selection {
+ return s.BeforeNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// BeforeSelection inserts the elements in the selection before each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeSelection(sel *Selection) *Selection {
+ return s.BeforeNodes(sel.Nodes...)
+}
+
+// BeforeHtml parses the html and inserts it before the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, node)
+ }
+ }
+ })
+}
+
+// BeforeNodes inserts the nodes before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn)
+ }
+ })
+}
+
+// Clone creates a deep copy of the set of matched nodes. The new nodes will not be
+// attached to the document.
+func (s *Selection) Clone() *Selection {
+ ns := newEmptySelection(s.document)
+ ns.Nodes = cloneNodes(s.Nodes)
+ return ns
+}
+
+// Empty removes all children nodes from the set of matched elements.
+// It returns the children nodes in a new Selection.
+func (s *Selection) Empty() *Selection {
+ var nodes []*html.Node
+
+ for _, n := range s.Nodes {
+ for c := n.FirstChild; c != nil; c = n.FirstChild {
+ n.RemoveChild(c)
+ nodes = append(nodes, c)
+ }
+ }
+
+ return pushStack(s, nodes)
+}
+
+// Prepend prepends the elements specified by the selector to each element in
+// the set of matched elements, following the same rules as Append.
+func (s *Selection) Prepend(selector string) *Selection {
+ return s.PrependMatcher(compileMatcher(selector))
+}
+
+// PrependMatcher prepends the elements specified by the matcher to each
+// element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependMatcher(m Matcher) *Selection {
+ return s.PrependNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// PrependSelection prepends the elements in the selection to each element in
+// the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependSelection(sel *Selection) *Selection {
+ return s.PrependNodes(sel.Nodes...)
+}
+
+// PrependHtml parses the html and prepends it to the set of matched elements.
+func (s *Selection) PrependHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ firstChild := node.FirstChild
+ for _, n := range nodes {
+ node.InsertBefore(n, firstChild)
+ }
+ })
+}
+
+// PrependNodes prepends the specified nodes to each node in the set of
+// matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ // sn.FirstChild may be nil, in which case this functions like
+ // sn.AppendChild()
+ sn.InsertBefore(n, sn.FirstChild)
+ })
+}
+
+// Remove removes the set of matched elements from the document.
+// It returns the same selection, now consisting of nodes not in the document.
+func (s *Selection) Remove() *Selection {
+ for _, n := range s.Nodes {
+ if n.Parent != nil {
+ n.Parent.RemoveChild(n)
+ }
+ }
+
+ return s
+}
+
+// RemoveFiltered removes from the current set of matched elements those that
+// match the selector filter. It returns the Selection of removed nodes.
+//
+// For example if the selection s contains "
COPYRIGHT AND PERMISSION NOTICE
+ ++Copyright (c) 1995-2012 International Business Machines Corporation and others +
++All rights reserved. +
++Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies +of the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. +
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, +OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE +USE OR PERFORMANCE OF THIS SOFTWARE. +
++Except as contained in this notice, the name of a copyright holder shall not be +used in advertising or otherwise to promote the sale, use or other dealings in +this Software without prior written authorization of the copyright holder. +
+ ++All trademarks and registered trademarks mentioned herein are the property of their respective owners. +
+ + diff --git a/vendor/github.com/saintfish/chardet/multi_byte.go b/vendor/github.com/saintfish/chardet/multi_byte.go new file mode 100644 index 0000000..1fab34c --- /dev/null +++ b/vendor/github.com/saintfish/chardet/multi_byte.go @@ -0,0 +1,345 @@ +package chardet + +import ( + "errors" + "math" +) + +type recognizerMultiByte struct { + charset string + language string + decoder charDecoder + commonChars []uint16 +} + +type charDecoder interface { + DecodeOneChar([]byte) (c uint16, remain []byte, err error) +} + +func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) { + return recognizerOutput{ + Charset: r.charset, + Language: r.language, + Confidence: r.matchConfidence(input), + } +} + +func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int { + raw := input.raw + var c uint16 + var err error + var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int + for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) { + totalCharCount++ + if err != nil { + badCharCount++ + } else if c <= 0xFF { + singleByteCharCount++ + } else { + doubleByteCharCount++ + if r.commonChars != nil && binarySearch(r.commonChars, c) { + commonCharCount++ + } + } + if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount { + return 0 + } + } + + if doubleByteCharCount <= 10 && badCharCount == 0 { + if doubleByteCharCount == 0 && totalCharCount < 10 { + return 0 + } else { + return 10 + } + } + + if doubleByteCharCount < 20*badCharCount { + return 0 + } + if r.commonChars == nil { + confidence := 30 + doubleByteCharCount - 20*badCharCount + if confidence > 100 { + confidence = 100 + } + return confidence + } + maxVal := math.Log(float64(doubleByteCharCount) / 4) + scaleFactor := 90 / maxVal + confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10) + if confidence > 100 { + confidence = 100 + } + if confidence < 0 { + confidence = 0 + } + return confidence +} + +func binarySearch(l []uint16, c uint16) bool { + start := 0 + end := len(l) - 1 + for start <= end { + mid := (start + end) / 2 + if c == l[mid] { + return true + } else if c < l[mid] { + end = mid - 1 + } else { + start = mid + 1 + } + } + return false +} + +var eobError = errors.New("End of input buffer") +var badCharError = errors.New("Decode a bad char") + +type charDecoder_sjis struct { +} + +func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + c = uint16(first) + remain = input[1:] + if first <= 0x7F || (first > 0xA0 && first <= 0xDF) { + return + } + if len(remain) == 0 { + return c, remain, badCharError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) { + } else { + err = badCharError + } + return +} + +var commonChars_sjis = []uint16{ + 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, + 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, + 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, + 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, + 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, + 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa, +} + +func newRecognizer_sjis() *recognizerMultiByte { + return &recognizerMultiByte{ + "Shift_JIS", + "ja", + charDecoder_sjis{}, + commonChars_sjis, + } +} + +type charDecoder_euc struct { +} + +func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x8D { + return uint16(first), remain, nil + } + if len(remain) == 0 { + return 0, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if first >= 0xA1 && first <= 0xFE { + if second < 0xA1 { + err = badCharError + } + return + } + if first == 0x8E { + if second < 0xA1 { + err = badCharError + } + return + } + if first == 0x8F { + if len(remain) == 0 { + return 0, nil, eobError + } + third := remain[0] + remain = remain[1:] + c = c<<0 | uint16(third) + if third < 0xa1 { + err = badCharError + } + } + return +} + +var commonChars_euc_jp = []uint16{ + 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, + 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, + 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, + 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, + 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, + 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, + 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, + 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, + 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, + 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1, +} + +var commonChars_euc_kr = []uint16{ + 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, + 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, + 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, + 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, + 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, + 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, + 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, + 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, + 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, + 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad, +} + +func newRecognizer_euc_jp() *recognizerMultiByte { + return &recognizerMultiByte{ + "EUC-JP", + "ja", + charDecoder_euc{}, + commonChars_euc_jp, + } +} + +func newRecognizer_euc_kr() *recognizerMultiByte { + return &recognizerMultiByte{ + "EUC-KR", + "ko", + charDecoder_euc{}, + commonChars_euc_kr, + } +} + +type charDecoder_big5 struct { +} + +func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x7F || first == 0xFF { + return + } + if len(remain) == 0 { + return c, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if second < 0x40 || second == 0x7F || second == 0xFF { + err = badCharError + } + return +} + +var commonChars_big5 = []uint16{ + 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, + 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, + 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, + 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, + 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, + 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, + 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, + 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, + 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, + 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f, +} + +func newRecognizer_big5() *recognizerMultiByte { + return &recognizerMultiByte{ + "Big5", + "zh", + charDecoder_big5{}, + commonChars_big5, + } +} + +type charDecoder_gb_18030 struct { +} + +func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { + if len(input) == 0 { + return 0, nil, eobError + } + first := input[0] + remain = input[1:] + c = uint16(first) + if first <= 0x80 { + return + } + if len(remain) == 0 { + return 0, nil, eobError + } + second := remain[0] + remain = remain[1:] + c = c<<8 | uint16(second) + if first >= 0x81 && first <= 0xFE { + if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) { + return + } + + if second >= 0x30 && second <= 0x39 { + if len(remain) == 0 { + return 0, nil, eobError + } + third := remain[0] + remain = remain[1:] + if third >= 0x81 && third <= 0xFE { + if len(remain) == 0 { + return 0, nil, eobError + } + fourth := remain[0] + remain = remain[1:] + if fourth >= 0x30 && fourth <= 0x39 { + c = c<<16 | uint16(third)<<8 | uint16(fourth) + return + } + } + } + err = badCharError + } + return +} + +var commonChars_gb_18030 = []uint16{ + 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, + 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, + 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, + 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, + 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, + 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, + 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, + 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, + 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, + 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0, +} + +func newRecognizer_gb_18030() *recognizerMultiByte { + return &recognizerMultiByte{ + "GB-18030", + "zh", + charDecoder_gb_18030{}, + commonChars_gb_18030, + } +} diff --git a/vendor/github.com/saintfish/chardet/recognizer.go b/vendor/github.com/saintfish/chardet/recognizer.go new file mode 100644 index 0000000..1bf8461 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/recognizer.go @@ -0,0 +1,83 @@ +package chardet + +type recognizer interface { + Match(*recognizerInput) recognizerOutput +} + +type recognizerOutput Result + +type recognizerInput struct { + raw []byte + input []byte + tagStripped bool + byteStats []int + hasC1Bytes bool +} + +func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { + input, stripped := mayStripInput(raw, stripTag) + byteStats := computeByteStats(input) + return &recognizerInput{ + raw: raw, + input: input, + tagStripped: stripped, + byteStats: byteStats, + hasC1Bytes: computeHasC1Bytes(byteStats), + } +} + +func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { + const inputBufferSize = 8192 + out = make([]byte, 0, inputBufferSize) + var badTags, openTags int32 + var inMarkup bool = false + stripped = false + if stripTag { + stripped = true + for _, c := range raw { + if c == '<' { + if inMarkup { + badTags += 1 + } + inMarkup = true + openTags += 1 + } + if !inMarkup { + out = append(out, c) + if len(out) >= inputBufferSize { + break + } + } + if c == '>' { + inMarkup = false + } + } + } + if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { + limit := len(raw) + if limit > inputBufferSize { + limit = inputBufferSize + } + out = make([]byte, limit) + copy(out, raw[:limit]) + stripped = false + } + return +} + +func computeByteStats(input []byte) []int { + r := make([]int, 256) + for _, c := range input { + r[c] += 1 + } + return r +} + +func computeHasC1Bytes(byteStats []int) bool { + for _, count := range byteStats[0x80 : 0x9F+1] { + if count > 0 { + return true + } + } + return false +} diff --git a/vendor/github.com/saintfish/chardet/single_byte.go b/vendor/github.com/saintfish/chardet/single_byte.go new file mode 100644 index 0000000..efe41c9 --- /dev/null +++ b/vendor/github.com/saintfish/chardet/single_byte.go @@ -0,0 +1,882 @@ +package chardet + +// Recognizer for single byte charset family +type recognizerSingleByte struct { + charset string + hasC1ByteCharset string + language string + charMap *[256]byte + ngram *[64]uint32 +} + +func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { + var charset string = r.charset + if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { + charset = r.hasC1ByteCharset + } + return recognizerOutput{ + Charset: charset, + Language: r.language, + Confidence: r.parseNgram(input.input), + } +} + +type ngramState struct { + ngram uint32 + ignoreSpace bool + ngramCount, ngramHit uint32 + table *[64]uint32 +} + +func newNgramState(table *[64]uint32) *ngramState { + return &ngramState{ + ngram: 0, + ignoreSpace: false, + ngramCount: 0, + ngramHit: 0, + table: table, + } +} + +func (s *ngramState) AddByte(b byte) { + const ngramMask = 0xFFFFFF + if !(b == 0x20 && s.ignoreSpace) { + s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask + s.ignoreSpace = (s.ngram == 0x20) + s.ngramCount++ + if s.lookup() { + s.ngramHit++ + } + } + s.ignoreSpace = (b == 0x20) +} + +func (s *ngramState) HitRate() float32 { + if s.ngramCount == 0 { + return 0 + } + return float32(s.ngramHit) / float32(s.ngramCount) +} + +func (s *ngramState) lookup() bool { + var index int + if s.table[index+32] <= s.ngram { + index += 32 + } + if s.table[index+16] <= s.ngram { + index += 16 + } + if s.table[index+8] <= s.ngram { + index += 8 + } + if s.table[index+4] <= s.ngram { + index += 4 + } + if s.table[index+2] <= s.ngram { + index += 2 + } + if s.table[index+1] <= s.ngram { + index += 1 + } + if s.table[index] > s.ngram { + index -= 1 + } + if index < 0 || s.table[index] != s.ngram { + return false + } + return true +} + +func (r *recognizerSingleByte) parseNgram(input []byte) int { + state := newNgramState(r.ngram) + for _, inChar := range input { + c := r.charMap[inChar] + if c != 0 { + state.AddByte(c) + } + } + state.AddByte(0x20) + rate := state.HitRate() + if rate > 0.33 { + return 98 + } + return int(rate * 300) +} + +var charMap_8859_1 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_8859_1_en = [64]uint32{ + 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, + 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, + 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, + 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, +} + +var ngrams_8859_1_da = [64]uint32{ + 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, + 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, + 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, + 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, +} + +var ngrams_8859_1_de = [64]uint32{ + 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, + 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, + 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, + 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, +} + +var ngrams_8859_1_es = [64]uint32{ + 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, + 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, + 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, +} + +var ngrams_8859_1_fr = [64]uint32{ + 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, + 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, + 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, + 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, +} + +var ngrams_8859_1_it = [64]uint32{ + 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, + 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, + 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, + 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, +} + +var ngrams_8859_1_nl = [64]uint32{ + 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, + 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, + 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, + 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, +} + +var ngrams_8859_1_no = [64]uint32{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, + 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, + 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, +} + +var ngrams_8859_1_pt = [64]uint32{ + 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, + 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, + 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, + 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, +} + +var ngrams_8859_1_sv = [64]uint32{ + 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, + 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, + 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, + 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, +} + +func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-1", + hasC1ByteCharset: "windows-1252", + language: language, + charMap: &charMap_8859_1, + ngram: ngram, + } +} + +func newRecognizer_8859_1_en() *recognizerSingleByte { + return newRecognizer_8859_1("en", &ngrams_8859_1_en) +} +func newRecognizer_8859_1_da() *recognizerSingleByte { + return newRecognizer_8859_1("da", &ngrams_8859_1_da) +} +func newRecognizer_8859_1_de() *recognizerSingleByte { + return newRecognizer_8859_1("de", &ngrams_8859_1_de) +} +func newRecognizer_8859_1_es() *recognizerSingleByte { + return newRecognizer_8859_1("es", &ngrams_8859_1_es) +} +func newRecognizer_8859_1_fr() *recognizerSingleByte { + return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) +} +func newRecognizer_8859_1_it() *recognizerSingleByte { + return newRecognizer_8859_1("it", &ngrams_8859_1_it) +} +func newRecognizer_8859_1_nl() *recognizerSingleByte { + return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) +} +func newRecognizer_8859_1_no() *recognizerSingleByte { + return newRecognizer_8859_1("no", &ngrams_8859_1_no) +} +func newRecognizer_8859_1_pt() *recognizerSingleByte { + return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) +} +func newRecognizer_8859_1_sv() *recognizerSingleByte { + return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) +} + +var charMap_8859_2 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, + 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, + 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, + 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, +} + +var ngrams_8859_2_cs = [64]uint32{ + 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, + 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, + 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, + 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, +} + +var ngrams_8859_2_hu = [64]uint32{ + 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, + 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, + 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, + 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, +} + +var ngrams_8859_2_pl = [64]uint32{ + 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, + 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, + 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, + 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, +} + +var ngrams_8859_2_ro = [64]uint32{ + 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, + 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, + 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, + 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, +} + +func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-2", + hasC1ByteCharset: "windows-1250", + language: language, + charMap: &charMap_8859_2, + ngram: ngram, + } +} + +func newRecognizer_8859_2_cs() *recognizerSingleByte { + return newRecognizer_8859_1("cs", &ngrams_8859_2_cs) +} +func newRecognizer_8859_2_hu() *recognizerSingleByte { + return newRecognizer_8859_1("hu", &ngrams_8859_2_hu) +} +func newRecognizer_8859_2_pl() *recognizerSingleByte { + return newRecognizer_8859_1("pl", &ngrams_8859_2_pl) +} +func newRecognizer_8859_2_ro() *recognizerSingleByte { + return newRecognizer_8859_1("ro", &ngrams_8859_2_ro) +} + +var charMap_8859_5 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, +} + +var ngrams_8859_5_ru = [64]uint32{ + 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, + 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, + 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, + 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, +} + +func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-5", + language: language, + charMap: &charMap_8859_5, + ngram: ngram, + } +} + +func newRecognizer_8859_5_ru() *recognizerSingleByte { + return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) +} + +var charMap_8859_6 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, +} + +var ngrams_8859_6_ar = [64]uint32{ + 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, + 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, + 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, + 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, +} + +func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-6", + language: language, + charMap: &charMap_8859_6, + ngram: ngram, + } +} + +func newRecognizer_8859_6_ar() *recognizerSingleByte { + return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) +} + +var charMap_8859_7 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, + 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, + 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, +} + +var ngrams_8859_7_el = [64]uint32{ + 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, + 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, + 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, + 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, +} + +func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-7", + hasC1ByteCharset: "windows-1253", + language: language, + charMap: &charMap_8859_7, + ngram: ngram, + } +} + +func newRecognizer_8859_7_el() *recognizerSingleByte { + return newRecognizer_8859_7("el", &ngrams_8859_7_el) +} + +var charMap_8859_8 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, +} + +var ngrams_8859_8_I_he = [64]uint32{ + 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, + 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, + 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, + 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, +} + +var ngrams_8859_8_he = [64]uint32{ + 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, + 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, + 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, + 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, +} + +func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-8", + hasC1ByteCharset: "windows-1255", + language: language, + charMap: &charMap_8859_8, + ngram: ngram, + } +} + +func newRecognizer_8859_8_I_he() *recognizerSingleByte { + r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) + r.charset = "ISO-8859-8-I" + return r +} + +func newRecognizer_8859_8_he() *recognizerSingleByte { + return newRecognizer_8859_8("he", &ngrams_8859_8_he) +} + +var charMap_8859_9 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_8859_9_tr = [64]uint32{ + 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, + 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, + 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, + 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, +} + +func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "ISO-8859-9", + hasC1ByteCharset: "windows-1254", + language: language, + charMap: &charMap_8859_9, + ngram: ngram, + } +} + +func newRecognizer_8859_9_tr() *recognizerSingleByte { + return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) +} + +var charMap_windows_1256 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, + 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, + 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, + 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, +} + +var ngrams_windows_1256 = [64]uint32{ + 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, + 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, + 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, + 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, +} + +func newRecognizer_windows_1256() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "windows-1256", + language: "ar", + charMap: &charMap_windows_1256, + ngram: &ngrams_windows_1256, + } +} + +var charMap_windows_1251 = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, + 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, + 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, + 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, + 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, + 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, +} + +var ngrams_windows_1251 = [64]uint32{ + 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, + 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, + 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, + 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, +} + +func newRecognizer_windows_1251() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "windows-1251", + language: "ar", + charMap: &charMap_windows_1251, + ngram: &ngrams_windows_1251, + } +} + +var charMap_KOI8_R = [256]byte{ + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, +} + +var ngrams_KOI8_R = [64]uint32{ + 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, + 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, + 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, + 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, +} + +func newRecognizer_KOI8_R() *recognizerSingleByte { + return &recognizerSingleByte{ + charset: "KOI8-R", + language: "ru", + charMap: &charMap_KOI8_R, + ngram: &ngrams_KOI8_R, + } +} + +var charMap_IBM424_he = [256]byte{ + /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ + /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, + /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, +} + +var ngrams_IBM424_he_rtl = [64]uint32{ + 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, + 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, + 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, + 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, +} + +var ngrams_IBM424_he_ltr = [64]uint32{ + 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, + 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, + 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, + 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, +} + +func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: charset, + language: "he", + charMap: &charMap_IBM424_he, + ngram: ngram, + } +} + +func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { + return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) +} + +func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { + return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) +} + +var charMap_IBM420_ar = [256]byte{ + /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ + /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, + /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, + /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, +} + +var ngrams_IBM420_ar_rtl = [64]uint32{ + 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, + 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, + 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, + 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, +} + +var ngrams_IBM420_ar_ltr = [64]uint32{ + 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, + 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, + 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, + 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, +} + +func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { + return &recognizerSingleByte{ + charset: charset, + language: "ar", + charMap: &charMap_IBM420_ar, + ngram: ngram, + } +} + +func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { + return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) +} + +func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { + return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) +} diff --git a/vendor/github.com/saintfish/chardet/unicode.go b/vendor/github.com/saintfish/chardet/unicode.go new file mode 100644 index 0000000..6f9fa9e --- /dev/null +++ b/vendor/github.com/saintfish/chardet/unicode.go @@ -0,0 +1,103 @@ +package chardet + +import ( + "bytes" +) + +var ( + utf16beBom = []byte{0xFE, 0xFF} + utf16leBom = []byte{0xFF, 0xFE} + utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} + utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} +) + +type recognizerUtf16be struct { +} + +func newRecognizer_utf16be() *recognizerUtf16be { + return &recognizerUtf16be{} +} + +func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-16BE", + } + if bytes.HasPrefix(input.raw, utf16beBom) { + output.Confidence = 100 + } + return +} + +type recognizerUtf16le struct { +} + +func newRecognizer_utf16le() *recognizerUtf16le { + return &recognizerUtf16le{} +} + +func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-16LE", + } + if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { + output.Confidence = 100 + } + return +} + +type recognizerUtf32 struct { + name string + bom []byte + decodeChar func(input []byte) uint32 +} + +func decodeUtf32be(input []byte) uint32 { + return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) +} + +func decodeUtf32le(input []byte) uint32 { + return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) +} + +func newRecognizer_utf32be() *recognizerUtf32 { + return &recognizerUtf32{ + "UTF-32BE", + utf32beBom, + decodeUtf32be, + } +} + +func newRecognizer_utf32le() *recognizerUtf32 { + return &recognizerUtf32{ + "UTF-32LE", + utf32leBom, + decodeUtf32le, + } +} + +func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: r.name, + } + hasBom := bytes.HasPrefix(input.raw, r.bom) + var numValid, numInvalid uint32 + for b := input.raw; len(b) >= 4; b = b[4:] { + if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { + numInvalid++ + } else { + numValid++ + } + } + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} diff --git a/vendor/github.com/saintfish/chardet/utf8.go b/vendor/github.com/saintfish/chardet/utf8.go new file mode 100644 index 0000000..ae036ad --- /dev/null +++ b/vendor/github.com/saintfish/chardet/utf8.go @@ -0,0 +1,71 @@ +package chardet + +import ( + "bytes" +) + +var utf8Bom = []byte{0xEF, 0xBB, 0xBF} + +type recognizerUtf8 struct { +} + +func newRecognizer_utf8() *recognizerUtf8 { + return &recognizerUtf8{} +} + +func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-8", + } + hasBom := bytes.HasPrefix(input.raw, utf8Bom) + inputLen := len(input.raw) + var numValid, numInvalid uint32 + var trailBytes uint8 + for i := 0; i < inputLen; i++ { + c := input.raw[i] + if c&0x80 == 0 { + continue + } + if c&0xE0 == 0xC0 { + trailBytes = 1 + } else if c&0xF0 == 0xE0 { + trailBytes = 2 + } else if c&0xF8 == 0xF0 { + trailBytes = 3 + } else { + numInvalid++ + if numInvalid > 5 { + break + } + trailBytes = 0 + } + + for i++; i < inputLen; i++ { + c = input.raw[i] + if c&0xC0 != 0x80 { + numInvalid++ + break + } + if trailBytes--; trailBytes == 0 { + numValid++ + break + } + } + } + + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid == 0 && numInvalid == 0 { + // Plain ASCII + output.Confidence = 10 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} diff --git a/vendor/github.com/schollz/progressbar/v3/.gitignore b/vendor/github.com/schollz/progressbar/v3/.gitignore new file mode 100644 index 0000000..35c3bde --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/.gitignore @@ -0,0 +1,18 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +.idea/ +*.tar.gz diff --git a/vendor/github.com/schollz/progressbar/v3/.travis.yml b/vendor/github.com/schollz/progressbar/v3/.travis.yml new file mode 100644 index 0000000..1d23387 --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/.travis.yml @@ -0,0 +1,6 @@ +language: go + +go: + - tip + +script: go test -v . diff --git a/vendor/github.com/schollz/progressbar/v3/LICENSE b/vendor/github.com/schollz/progressbar/v3/LICENSE new file mode 100644 index 0000000..0ca9765 --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Zack + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/schollz/progressbar/v3/README.md b/vendor/github.com/schollz/progressbar/v3/README.md new file mode 100644 index 0000000..7fe24ad --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/README.md @@ -0,0 +1,121 @@ +# progressbar + +[](https://travis-ci.org/schollz/progressbar) +[](https://goreportcard.com/report/github.com/schollz/progressbar) +[](https://gocover.io/github.com/schollz/progressbar) +[](https://godoc.org/github.com/schollz/progressbar/v3) + +A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6). + + +## Install + +``` +go get -u github.com/schollz/progressbar/v3 +``` + +## Usage + +### Basic usage + +```golang +bar := progressbar.Default(100) +for i := 0; i < 100; i++ { + bar.Add(1) + time.Sleep(40 * time.Millisecond) +} +``` + +which looks like: + + + + +### I/O operations + +The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`. + +```golang +req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil) +resp, _ := http.DefaultClient.Do(req) +defer resp.Body.Close() + +f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644) +defer f.Close() + +bar := progressbar.DefaultBytes( + resp.ContentLength, + "downloading", +) +io.Copy(io.MultiWriter(f, bar), resp.Body) +``` + +which looks like: + + + + +### Progress bar with unknown length + +A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`. + +which looks like: + + + + +### Customization + +There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option). + +```golang +bar := progressbar.NewOptions(1000, + progressbar.OptionSetWriter(ansi.NewAnsiStdout()), + progressbar.OptionEnableColorCodes(true), + progressbar.OptionShowBytes(true), + progressbar.OptionSetWidth(15), + progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."), + progressbar.OptionSetTheme(progressbar.Theme{ + Saucer: "[green]=[reset]", + SaucerHead: "[green]>[reset]", + SaucerPadding: " ", + BarStart: "[", + BarEnd: "]", + })) +for i := 0; i < 1000; i++ { + bar.Add(1) + time.Sleep(5 * time.Millisecond) +} +``` + +which looks like: + + + + +## Contributing + +Pull requests are welcome. Feel free to... + +- Revise documentation +- Add new features +- Fix bugs +- Suggest improvements + +## Thanks + +Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0! + +Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support! + +Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features! + +Thanks [@tehstun](https://github.com/tehstun) for some great PRs! + +Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3! + +Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners. + +## License + +MIT diff --git a/vendor/github.com/schollz/progressbar/v3/go.mod b/vendor/github.com/schollz/progressbar/v3/go.mod new file mode 100644 index 0000000..53b8460 --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/go.mod @@ -0,0 +1,15 @@ +module github.com/schollz/progressbar/v3 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 + github.com/mattn/go-isatty v0.0.13 // indirect + github.com/mattn/go-runewidth v0.0.13 + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db + github.com/stretchr/testify v1.3.0 + golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e + golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect + golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect +) + +go 1.13 diff --git a/vendor/github.com/schollz/progressbar/v3/go.sum b/vendor/github.com/schollz/progressbar/v3/go.sum new file mode 100644 index 0000000..76f0dfa --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/go.sum @@ -0,0 +1,31 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= +github.com/mattn/go-isatty v0.0.13 h1:qdl+GuBjcsKKDco5BsxPJlId98mSWNKqYA+Co0SC1yA= +github.com/mattn/go-isatty v0.0.13/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e h1:gsTQYXdTw2Gq7RBsWvlQ91b+aEQ6bXFUngBGuR8sPpI= +golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 h1:RqytpXGR1iVNX7psjB3ff8y7sNFinVFvkx1c8SjBkio= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b h1:9zKuko04nR4gjZ4+DNjHqRlAJqbJETHwiNKDqTfOjfE= +golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/vendor/github.com/schollz/progressbar/v3/progressbar.go b/vendor/github.com/schollz/progressbar/v3/progressbar.go new file mode 100644 index 0000000..4de3c50 --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/progressbar.go @@ -0,0 +1,927 @@ +package progressbar + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "math" + "os" + "regexp" + "strings" + "sync" + "time" + + "github.com/mattn/go-runewidth" + "github.com/mitchellh/colorstring" + "golang.org/x/crypto/ssh/terminal" +) + +// ProgressBar is a thread-safe, simple +// progress bar +type ProgressBar struct { + state state + config config + lock sync.Mutex +} + +// State is the basic properties of the bar +type State struct { + CurrentPercent float64 + CurrentBytes float64 + SecondsSince float64 + SecondsLeft float64 + KBsPerSecond float64 +} + +type state struct { + currentNum int64 + currentPercent int + lastPercent int + currentSaucerSize int + + lastShown time.Time + startTime time.Time + + counterTime time.Time + counterNumSinceLast int64 + counterLastTenRates []float64 + + maxLineWidth int + currentBytes float64 + finished bool + + rendered string +} + +type config struct { + max int64 // max number of the counter + maxHumanized string + maxHumanizedSuffix string + width int + writer io.Writer + theme Theme + renderWithBlankState bool + description string + iterationString string + ignoreLength bool // ignoreLength if max bytes not known + + // whether the output is expected to contain color codes + colorCodes bool + + // show rate of change in kB/sec or MB/sec + showBytes bool + // show the iterations per second + showIterationsPerSecond bool + showIterationsCount bool + + // whether the progress bar should attempt to predict the finishing + // time of the progress based on the start time and the average + // number of seconds between increments. + predictTime bool + + // minimum time to wait in between updates + throttleDuration time.Duration + + // clear bar once finished + clearOnFinish bool + + // spinnerType should be a number between 0-75 + spinnerType int + + // fullWidth specifies whether to measure and set the bar to a specific width + fullWidth bool + + // invisible doesn't render the bar at all, useful for debugging + invisible bool + + onCompletion func() + + // whether the render function should make use of ANSI codes to reduce console I/O + useANSICodes bool +} + +// Theme defines the elements of the bar +type Theme struct { + Saucer string + SaucerHead string + SaucerPadding string + BarStart string + BarEnd string +} + +// Option is the type all options need to adhere to +type Option func(p *ProgressBar) + +// OptionSetWidth sets the width of the bar +func OptionSetWidth(s int) Option { + return func(p *ProgressBar) { + p.config.width = s + } +} + +// OptionSpinnerType sets the type of spinner used for indeterminate bars +func OptionSpinnerType(spinnerType int) Option { + return func(p *ProgressBar) { + p.config.spinnerType = spinnerType + } +} + +// OptionSetTheme sets the elements the bar is constructed of +func OptionSetTheme(t Theme) Option { + return func(p *ProgressBar) { + p.config.theme = t + } +} + +// OptionSetVisibility sets the visibility +func OptionSetVisibility(visibility bool) Option { + return func(p *ProgressBar) { + p.config.invisible = !visibility + } +} + +// OptionFullWidth sets the bar to be full width +func OptionFullWidth() Option { + return func(p *ProgressBar) { + p.config.fullWidth = true + } +} + +// OptionSetWriter sets the output writer (defaults to os.StdOut) +func OptionSetWriter(w io.Writer) Option { + return func(p *ProgressBar) { + p.config.writer = w + } +} + +// OptionSetRenderBlankState sets whether or not to render a 0% bar on construction +func OptionSetRenderBlankState(r bool) Option { + return func(p *ProgressBar) { + p.config.renderWithBlankState = r + } +} + +// OptionSetDescription sets the description of the bar to render in front of it +func OptionSetDescription(description string) Option { + return func(p *ProgressBar) { + p.config.description = description + } +} + +// OptionEnableColorCodes enables or disables support for color codes +// using mitchellh/colorstring +func OptionEnableColorCodes(colorCodes bool) Option { + return func(p *ProgressBar) { + p.config.colorCodes = colorCodes + } +} + +// OptionSetPredictTime will also attempt to predict the time remaining. +func OptionSetPredictTime(predictTime bool) Option { + return func(p *ProgressBar) { + p.config.predictTime = predictTime + } +} + +// OptionShowCount will also print current count out of total +func OptionShowCount() Option { + return func(p *ProgressBar) { + p.config.showIterationsCount = true + } +} + +// OptionShowIts will also print the iterations/second +func OptionShowIts() Option { + return func(p *ProgressBar) { + p.config.showIterationsPerSecond = true + } +} + +// OptionSetItsString sets what's displayed for interations a second. The default is "it" which would display: "it/s" +func OptionSetItsString(iterationString string) Option { + return func(p *ProgressBar) { + p.config.iterationString = iterationString + } +} + +// OptionThrottle will wait the specified duration before updating again. The default +// duration is 0 seconds. +func OptionThrottle(duration time.Duration) Option { + return func(p *ProgressBar) { + p.config.throttleDuration = duration + } +} + +// OptionClearOnFinish will clear the bar once its finished +func OptionClearOnFinish() Option { + return func(p *ProgressBar) { + p.config.clearOnFinish = true + } +} + +// OptionOnCompletion will invoke cmpl function once its finished +func OptionOnCompletion(cmpl func()) Option { + return func(p *ProgressBar) { + p.config.onCompletion = cmpl + } +} + +// OptionShowBytes will update the progress bar +// configuration settings to display/hide kBytes/Sec +func OptionShowBytes(val bool) Option { + return func(p *ProgressBar) { + p.config.showBytes = val + } +} + +// OptionUseANSICodes will use more optimized terminal i/o. +// +// Only useful in environments with support for ANSI escape sequences. +func OptionUseANSICodes(val bool) Option { + return func(p *ProgressBar) { + p.config.useANSICodes = val + } +} + +var defaultTheme = Theme{Saucer: "█", SaucerPadding: " ", BarStart: "|", BarEnd: "|"} + +// NewOptions constructs a new instance of ProgressBar, with any options you specify +func NewOptions(max int, options ...Option) *ProgressBar { + return NewOptions64(int64(max), options...) +} + +// NewOptions64 constructs a new instance of ProgressBar, with any options you specify +func NewOptions64(max int64, options ...Option) *ProgressBar { + b := ProgressBar{ + state: getBasicState(), + config: config{ + writer: os.Stdout, + theme: defaultTheme, + iterationString: "it", + width: 40, + max: max, + throttleDuration: 0 * time.Nanosecond, + predictTime: true, + spinnerType: 9, + invisible: false, + }, + } + + for _, o := range options { + o(&b) + } + + if b.config.spinnerType < 0 || b.config.spinnerType > 75 { + panic("invalid spinner type, must be between 0 and 75") + } + + // ignoreLength if max bytes not known + if b.config.max == -1 { + b.config.ignoreLength = true + b.config.max = int64(b.config.width) + b.config.predictTime = false + } + + b.config.maxHumanized, b.config.maxHumanizedSuffix = humanizeBytes(float64(b.config.max)) + + if b.config.renderWithBlankState { + b.RenderBlank() + } + + return &b +} + +func getBasicState() state { + now := time.Now() + return state{ + startTime: now, + lastShown: now, + counterTime: now, + } +} + +// New returns a new ProgressBar +// with the specified maximum +func New(max int) *ProgressBar { + return NewOptions(max) +} + +// DefaultBytes provides a progressbar to measure byte +// throughput with recommended defaults. +// Set maxBytes to -1 to use as a spinner. +func DefaultBytes(maxBytes int64, description ...string) *ProgressBar { + desc := "" + if len(description) > 0 { + desc = description[0] + } + bar := NewOptions64( + maxBytes, + OptionSetDescription(desc), + OptionSetWriter(os.Stderr), + OptionShowBytes(true), + OptionSetWidth(10), + OptionThrottle(65*time.Millisecond), + OptionShowCount(), + OptionOnCompletion(func() { + fmt.Fprint(os.Stderr, "\n") + }), + OptionSpinnerType(14), + OptionFullWidth(), + ) + bar.RenderBlank() + return bar +} + +// DefaultBytesSilent is the same as DefaultBytes, but does not output anywhere. +// String() can be used to get the output instead. +func DefaultBytesSilent(maxBytes int64, description ...string) *ProgressBar { + // Mostly the same bar as DefaultBytes + + desc := "" + if len(description) > 0 { + desc = description[0] + } + bar := NewOptions64( + maxBytes, + OptionSetDescription(desc), + OptionSetWriter(ioutil.Discard), + OptionShowBytes(true), + OptionSetWidth(10), + OptionThrottle(65*time.Millisecond), + OptionShowCount(), + OptionSpinnerType(14), + OptionFullWidth(), + ) + bar.RenderBlank() + return bar +} + +// Default provides a progressbar with recommended defaults. +// Set max to -1 to use as a spinner. +func Default(max int64, description ...string) *ProgressBar { + desc := "" + if len(description) > 0 { + desc = description[0] + } + bar := NewOptions64( + max, + OptionSetDescription(desc), + OptionSetWriter(os.Stderr), + OptionSetWidth(10), + OptionThrottle(65*time.Millisecond), + OptionShowCount(), + OptionShowIts(), + OptionOnCompletion(func() { + fmt.Fprint(os.Stderr, "\n") + }), + OptionSpinnerType(14), + OptionFullWidth(), + ) + bar.RenderBlank() + return bar +} + +// DefaultSilent is the same as Default, but does not output anywhere. +// String() can be used to get the output instead. +func DefaultSilent(max int64, description ...string) *ProgressBar { + // Mostly the same bar as Default + + desc := "" + if len(description) > 0 { + desc = description[0] + } + bar := NewOptions64( + max, + OptionSetDescription(desc), + OptionSetWriter(ioutil.Discard), + OptionSetWidth(10), + OptionThrottle(65*time.Millisecond), + OptionShowCount(), + OptionShowIts(), + OptionSpinnerType(14), + OptionFullWidth(), + ) + bar.RenderBlank() + return bar +} + +// String returns the current rendered version of the progress bar. +// It will never return an empty string while the progress bar is running. +func (p *ProgressBar) String() string { + return p.state.rendered +} + +// RenderBlank renders the current bar state, you can use this to render a 0% state +func (p *ProgressBar) RenderBlank() error { + if p.config.invisible { + return nil + } + return p.render() +} + +// Reset will reset the clock that is used +// to calculate current time and the time left. +func (p *ProgressBar) Reset() { + p.lock.Lock() + defer p.lock.Unlock() + + p.state = getBasicState() +} + +// Finish will fill the bar to full +func (p *ProgressBar) Finish() error { + p.lock.Lock() + p.state.currentNum = p.config.max + p.lock.Unlock() + return p.Add(0) +} + +// Add will add the specified amount to the progressbar +func (p *ProgressBar) Add(num int) error { + return p.Add64(int64(num)) +} + +// Set wil set the bar to a current number +func (p *ProgressBar) Set(num int) error { + return p.Set64(int64(num)) +} + +// Set64 wil set the bar to a current number +func (p *ProgressBar) Set64(num int64) error { + p.lock.Lock() + toAdd := num - int64(p.state.currentBytes) + p.lock.Unlock() + return p.Add64(toAdd) +} + +// Add64 will add the specified amount to the progressbar +func (p *ProgressBar) Add64(num int64) error { + if p.config.invisible { + return nil + } + p.lock.Lock() + defer p.lock.Unlock() + + if p.config.max == 0 { + return errors.New("max must be greater than 0") + } + + if p.state.currentNum < p.config.max { + if p.config.ignoreLength { + p.state.currentNum = (p.state.currentNum + num) % p.config.max + } else { + p.state.currentNum += num + } + } + + p.state.currentBytes += float64(num) + + // reset the countdown timer every second to take rolling average + p.state.counterNumSinceLast += num + if time.Since(p.state.counterTime).Seconds() > 0.5 { + p.state.counterLastTenRates = append(p.state.counterLastTenRates, float64(p.state.counterNumSinceLast)/time.Since(p.state.counterTime).Seconds()) + if len(p.state.counterLastTenRates) > 10 { + p.state.counterLastTenRates = p.state.counterLastTenRates[1:] + } + p.state.counterTime = time.Now() + p.state.counterNumSinceLast = 0 + } + + percent := float64(p.state.currentNum) / float64(p.config.max) + p.state.currentSaucerSize = int(percent * float64(p.config.width)) + p.state.currentPercent = int(percent * 100) + updateBar := p.state.currentPercent != p.state.lastPercent && p.state.currentPercent > 0 + + p.state.lastPercent = p.state.currentPercent + if p.state.currentNum > p.config.max { + return errors.New("current number exceeds max") + } + + // always update if show bytes/second or its/second + if updateBar || p.config.showIterationsPerSecond || p.config.showIterationsCount { + return p.render() + } + + return nil +} + +// Clear erases the progress bar from the current line +func (p *ProgressBar) Clear() error { + return clearProgressBar(p.config, p.state) +} + +// Describe will change the description shown before the progress, which +// can be changed on the fly (as for a slow running process). +func (p *ProgressBar) Describe(description string) { + p.config.description = description +} + +// New64 returns a new ProgressBar +// with the specified maximum +func New64(max int64) *ProgressBar { + return NewOptions64(max) +} + +// GetMax returns the max of a bar +func (p *ProgressBar) GetMax() int { + return int(p.config.max) +} + +// GetMax64 returns the current max +func (p *ProgressBar) GetMax64() int64 { + return p.config.max +} + +// ChangeMax takes in a int +// and changes the max value +// of the progress bar +func (p *ProgressBar) ChangeMax(newMax int) { + p.ChangeMax64(int64(newMax)) +} + +// ChangeMax64 is basically +// the same as ChangeMax, +// but takes in a int64 +// to avoid casting +func (p *ProgressBar) ChangeMax64(newMax int64) { + p.config.max = newMax + p.Add(0) // re-render +} + +// IsFinished returns true if progreess bar is completed +func (p *ProgressBar) IsFinished() bool { + return p.state.finished +} + +// render renders the progress bar, updating the maximum +// rendered line width. this function is not thread-safe, +// so it must be called with an acquired lock. +func (p *ProgressBar) render() error { + // make sure that the rendering is not happening too quickly + // but always show if the currentNum reaches the max + if time.Since(p.state.lastShown).Nanoseconds() < p.config.throttleDuration.Nanoseconds() && + p.state.currentNum < p.config.max { + return nil + } + + if !p.config.useANSICodes { + // first, clear the existing progress bar + err := clearProgressBar(p.config, p.state) + if err != nil { + return err + } + } + + // check if the progress bar is finished + if !p.state.finished && p.state.currentNum >= p.config.max { + p.state.finished = true + if !p.config.clearOnFinish { + renderProgressBar(p.config, &p.state) + } + + if p.config.onCompletion != nil { + p.config.onCompletion() + } + } + if p.state.finished { + // when using ANSI codes we don't pre-clean the current line + if p.config.useANSICodes { + err := clearProgressBar(p.config, p.state) + if err != nil { + return err + } + } + return nil + } + + // then, re-render the current progress bar + w, err := renderProgressBar(p.config, &p.state) + if err != nil { + return err + } + + if w > p.state.maxLineWidth { + p.state.maxLineWidth = w + } + + p.state.lastShown = time.Now() + + return nil +} + +// State returns the current state +func (p *ProgressBar) State() State { + p.lock.Lock() + defer p.lock.Unlock() + s := State{} + s.CurrentPercent = float64(p.state.currentNum) / float64(p.config.max) + s.CurrentBytes = p.state.currentBytes + s.SecondsSince = time.Since(p.state.startTime).Seconds() + if p.state.currentNum > 0 { + s.SecondsLeft = s.SecondsSince / float64(p.state.currentNum) * (float64(p.config.max) - float64(p.state.currentNum)) + } + s.KBsPerSecond = float64(p.state.currentBytes) / 1024.0 / s.SecondsSince + return s +} + +// regex matching ansi escape codes +var ansiRegex = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`) + +func getStringWidth(c config, str string, colorize bool) int { + if c.colorCodes { + // convert any color codes in the progress bar into the respective ANSI codes + str = colorstring.Color(str) + } + + // the width of the string, if printed to the console + // does not include the carriage return character + cleanString := strings.Replace(str, "\r", "", -1) + + if c.colorCodes { + // the ANSI codes for the colors do not take up space in the console output, + // so they do not count towards the output string width + cleanString = ansiRegex.ReplaceAllString(cleanString, "") + } + + // get the amount of runes in the string instead of the + // character count of the string, as some runes span multiple characters. + // see https://stackoverflow.com/a/12668840/2733724 + stringWidth := runewidth.StringWidth(cleanString) + return stringWidth +} + +func renderProgressBar(c config, s *state) (int, error) { + leftBrac := "" + rightBrac := "" + saucer := "" + bytesString := "" + str := "" + + averageRate := average(s.counterLastTenRates) + if len(s.counterLastTenRates) == 0 || s.finished { + // if no average samples, or if finished, + // then average rate should be the total rate + averageRate = s.currentBytes / time.Since(s.startTime).Seconds() + } + + // show iteration count in "current/total" iterations format + if c.showIterationsCount { + if bytesString == "" { + bytesString += "(" + } else { + bytesString += ", " + } + if !c.ignoreLength { + if c.showBytes { + currentHumanize, currentSuffix := humanizeBytes(s.currentBytes) + if currentSuffix == c.maxHumanizedSuffix { + bytesString += fmt.Sprintf("%s/%s%s", currentHumanize, c.maxHumanized, c.maxHumanizedSuffix) + } else { + bytesString += fmt.Sprintf("%s%s/%s%s", currentHumanize, currentSuffix, c.maxHumanized, c.maxHumanizedSuffix) + + } + } else { + bytesString += fmt.Sprintf("%.0f/%d", s.currentBytes, c.max) + } + } else { + if c.showBytes { + currentHumanize, currentSuffix := humanizeBytes(s.currentBytes) + bytesString += fmt.Sprintf("%s%s", currentHumanize, currentSuffix) + } else { + bytesString += fmt.Sprintf("%.0f/%s", s.currentBytes, "-") + } + } + } + + // show rolling average rate in kB/sec or MB/sec + if c.showBytes { + if bytesString == "" { + bytesString += "(" + } else { + bytesString += ", " + } + kbPerSecond := averageRate / 1024.0 + if kbPerSecond > 1024.0 { + bytesString += fmt.Sprintf("%0.3f MB/s", kbPerSecond/1024.0) + } else if kbPerSecond > 0 { + bytesString += fmt.Sprintf("%0.3f kB/s", kbPerSecond) + } + } + + // show iterations rate + if c.showIterationsPerSecond { + if bytesString == "" { + bytesString += "(" + } else { + bytesString += ", " + } + if averageRate > 1 { + bytesString += fmt.Sprintf("%0.0f %s/s", averageRate, c.iterationString) + } else { + bytesString += fmt.Sprintf("%0.0f %s/min", 60*averageRate, c.iterationString) + } + } + if bytesString != "" { + bytesString += ")" + } + + // show time prediction in "current/total" seconds format + if c.predictTime { + leftBrac = (time.Duration(time.Since(s.startTime).Seconds()) * time.Second).String() + rightBrac = (time.Duration((1/averageRate)*(float64(c.max)-float64(s.currentNum))) * time.Second).String() + } + + if c.fullWidth && !c.ignoreLength { + width, _, err := terminal.GetSize(int(os.Stdout.Fd())) + if err != nil { + width, _, err = terminal.GetSize(int(os.Stderr.Fd())) + if err != nil { + width = 80 + } + } + + c.width = width - getStringWidth(c, c.description, true) - 14 - len(bytesString) - len(leftBrac) - len(rightBrac) + s.currentSaucerSize = int(float64(s.currentPercent) / 100.0 * float64(c.width)) + } + if s.currentSaucerSize > 0 { + if c.ignoreLength { + saucer = strings.Repeat(c.theme.SaucerPadding, s.currentSaucerSize-1) + } else { + saucer = strings.Repeat(c.theme.Saucer, s.currentSaucerSize-1) + } + saucerHead := c.theme.SaucerHead + if saucerHead == "" || s.currentSaucerSize == c.width { + // use the saucer for the saucer head if it hasn't been set + // to preserve backwards compatibility + saucerHead = c.theme.Saucer + } + saucer += saucerHead + } + + /* + Progress Bar format + Description % |------ | (kb/s) (iteration count) (iteration rate) (predict time) + */ + repeatAmount := c.width - s.currentSaucerSize + if repeatAmount < 0 { + repeatAmount = 0 + } + if c.ignoreLength { + str = fmt.Sprintf("\r%s %s %s ", + spinners[c.spinnerType][int(math.Round(math.Mod(float64(time.Since(s.startTime).Milliseconds()/100), float64(len(spinners[c.spinnerType])))))], + c.description, + bytesString, + ) + } else if leftBrac == "" { + str = fmt.Sprintf("\r%s%4d%% %s%s%s%s %s ", + c.description, + s.currentPercent, + c.theme.BarStart, + saucer, + strings.Repeat(c.theme.SaucerPadding, repeatAmount), + c.theme.BarEnd, + bytesString, + ) + } else { + if s.currentPercent == 100 { + str = fmt.Sprintf("\r%s%4d%% %s%s%s%s %s", + c.description, + s.currentPercent, + c.theme.BarStart, + saucer, + strings.Repeat(c.theme.SaucerPadding, repeatAmount), + c.theme.BarEnd, + bytesString, + ) + } else { + str = fmt.Sprintf("\r%s%4d%% %s%s%s%s %s [%s:%s]", + c.description, + s.currentPercent, + c.theme.BarStart, + saucer, + strings.Repeat(c.theme.SaucerPadding, repeatAmount), + c.theme.BarEnd, + bytesString, + leftBrac, + rightBrac, + ) + } + } + + if c.colorCodes { + // convert any color codes in the progress bar into the respective ANSI codes + str = colorstring.Color(str) + } + + s.rendered = str + + return getStringWidth(c, str, false), writeString(c, str) +} + +func clearProgressBar(c config, s state) error { + if c.useANSICodes { + // write the "clear current line" ANSI escape sequence + return writeString(c, "\033[2K\r") + } + // fill the empty content + // to overwrite the progress bar and jump + // back to the beginning of the line + str := fmt.Sprintf("\r%s\r", strings.Repeat(" ", s.maxLineWidth)) + return writeString(c, str) + // the following does not show correctly if the previous line is longer than subsequent line + // return writeString(c, "\r") +} + +func writeString(c config, str string) error { + if _, err := io.WriteString(c.writer, str); err != nil { + return err + } + + if f, ok := c.writer.(*os.File); ok { + // ignore any errors in Sync(), as stdout + // can't be synced on some operating systems + // like Debian 9 (Stretch) + f.Sync() + } + + return nil +} + +// Reader is the progressbar io.Reader struct +type Reader struct { + io.Reader + bar *ProgressBar +} + +// NewReader return a new Reader with a given progress bar. +func NewReader(r io.Reader, bar *ProgressBar) Reader { + return Reader{ + Reader: r, + bar: bar, + } +} + +// Read will read the data and add the number of bytes to the progressbar +func (r *Reader) Read(p []byte) (n int, err error) { + n, err = r.Reader.Read(p) + r.bar.Add(n) + return +} + +// Close the reader when it implements io.Closer +func (r *Reader) Close() (err error) { + if closer, ok := r.Reader.(io.Closer); ok { + return closer.Close() + } + r.bar.Finish() + return +} + +// Write implement io.Writer +func (p *ProgressBar) Write(b []byte) (n int, err error) { + n = len(b) + p.Add(n) + return +} + +// Read implement io.Reader +func (p *ProgressBar) Read(b []byte) (n int, err error) { + n = len(b) + p.Add(n) + return +} + +func (p *ProgressBar) Close() (err error) { + p.Finish() + return +} + +func average(xs []float64) float64 { + total := 0.0 + for _, v := range xs { + total += v + } + return total / float64(len(xs)) +} + +func humanizeBytes(s float64) (string, string) { + sizes := []string{" B", " kB", " MB", " GB", " TB", " PB", " EB"} + base := 1024.0 + if s < 10 { + return fmt.Sprintf("%2.0f", s), "B" + } + e := math.Floor(logn(float64(s), base)) + suffix := sizes[int(e)] + val := math.Floor(float64(s)/math.Pow(base, e)*10+0.5) / 10 + f := "%.0f" + if val < 10 { + f = "%.1f" + } + + return fmt.Sprintf(f, val), suffix +} + +func logn(n, b float64) float64 { + return math.Log(n) / math.Log(b) +} diff --git a/vendor/github.com/schollz/progressbar/v3/spinners.go b/vendor/github.com/schollz/progressbar/v3/spinners.go new file mode 100644 index 0000000..c3ccd01 --- /dev/null +++ b/vendor/github.com/schollz/progressbar/v3/spinners.go @@ -0,0 +1,80 @@ +package progressbar + +var spinners = map[int][]string{ + 0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"}, + 1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"}, + 2: {"▖", "▘", "▝", "▗"}, + 3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"}, + 4: {"◢", "◣", "◤", "◥"}, + 5: {"◰", "◳", "◲", "◱"}, + 6: {"◴", "◷", "◶", "◵"}, + 7: {"◐", "◓", "◑", "◒"}, + 8: {".", "o", "O", "@", "*"}, + 9: {"|", "/", "-", "\\"}, + 10: {"◡◡", "⊙⊙", "◠◠"}, + 11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"}, + 12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"}, + 13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"}, + 14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}, + 15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}, + 16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"}, + 17: {"■", "□", "▪", "▫"}, + 18: {"←", "↑", "→", "↓"}, + 19: {"╫", "╪"}, + 20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"}, + 21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"}, + 22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"}, + 23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"}, + 24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"}, + 25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"}, + 26: {".", "..", "..."}, + 27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"}, + 28: {".", "o", "O", "°", "O", "o", "."}, + 29: {"+", "x"}, + 30: {"v", "<", "^", ">"}, + 31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"}, + 32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"}, + 33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"}, + 34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"}, + 35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"}, + 36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"}, + 37: {"ဝ", "၀"}, + 38: {"▌", "▀", "▐▄"}, + 39: {"🌍", "🌎", "🌏"}, + 40: {"◜", "◝", "◞", "◟"}, + 41: {"⬒", "⬔", "⬓", "⬕"}, + 42: {"⬖", "⬘", "⬗", "⬙"}, + 43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"}, + 44: {"♠", "♣", "♥", "♦"}, + 45: {"➞", "➟", "➠", "➡", "➠", "➟"}, + 46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "}, + 47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."}, + 48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "}, + 49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"}, + 50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"}, + 51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"}, + 52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"}, + 53: {"✶", "✸", "✹", "✺", "✹", "✷"}, + 54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"}, + 55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"}, + 56: {"¿", "?"}, + 57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"}, + 58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"}, + 59: {". ", ".. ", "...", " ..", " .", " "}, + 60: {".", "o", "O", "°", "O", "o", "."}, + 61: {"▓", "▒", "░"}, + 62: {"▌", "▀", "▐", "▄"}, + 63: {"⊶", "⊷"}, + 64: {"▪", "▫"}, + 65: {"□", "■"}, + 66: {"▮", "▯"}, + 67: {"-", "=", "≡"}, + 68: {"d", "q", "p", "b"}, + 69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"}, + 70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "}, + 71: {"☗", "☖"}, + 72: {"⧇", "⧆"}, + 73: {"◉", "◎"}, + 74: {"㊂", "㊀", "㊁"}, + 75: {"⦾", "⦿"}, +} diff --git a/vendor/github.com/temoto/robotstxt/.gitignore b/vendor/github.com/temoto/robotstxt/.gitignore new file mode 100644 index 0000000..6205f9e --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/.gitignore @@ -0,0 +1,15 @@ +*.cgo?.* +*.o +*.so +*.sublime-* +*.zip +.DS_Store +.idea/ +.tags* +_cgo_* +_gofuzz/crashers/ +_gofuzz/suppressions/ +_obj +_test +coverage.txt +robots.txt-check/robots.txt-check diff --git a/vendor/github.com/temoto/robotstxt/.golangci.yml b/vendor/github.com/temoto/robotstxt/.golangci.yml new file mode 100644 index 0000000..24e5858 --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/.golangci.yml @@ -0,0 +1,20 @@ +linters: + enable: + - goconst + - gofmt + - gosec + - maligned + - prealloc + - staticcheck + disable: + - deadcode + - structcheck + - varcheck + +linters-settings: + gofmt: + simplify: true + govet: + check-shadowing: true + maligned: + suggest-new: true diff --git a/vendor/github.com/temoto/robotstxt/.travis.yml b/vendor/github.com/temoto/robotstxt/.travis.yml new file mode 100644 index 0000000..ad90dac --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/.travis.yml @@ -0,0 +1,30 @@ +cache: + go: true + directories: + - $HOME/.cache + - $HOME/bin + - $HOME/gopath/pkg/mod +language: go +go: +- 1.11 +- 1.12 +- 1.13 +- 1.14 +- 1.x +- master +install: true +script: GO111MODULE=on go test -race + +matrix: + include: + - go: 1.x + env: task=coverage + script: GO111MODULE=on go test -race -covermode=atomic -coverprofile=coverage.txt + after_success: bash <(curl -s https://codecov.io/bash) + - go: 1.x + env: task=bench + script: GO111MODULE=on ./script/bench + - go: 1.x + install: curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s -- -b $HOME/bin v1.19.1 + env: task=clean + script: GO111MODULE=on ./script/clean diff --git a/vendor/github.com/temoto/robotstxt/LICENSE b/vendor/github.com/temoto/robotstxt/LICENSE new file mode 100644 index 0000000..c125145 --- /dev/null +++ b/vendor/github.com/temoto/robotstxt/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2010 Sergey Shepelev