mirror of
https://bitbucket.org/skobkin/point-tools-crawler.git
synced 2024-12-04 18:25:52 +00:00
Crawler now supports 2 new flags: -u (select from uuid) and -c (number of pages to get).
This commit is contained in:
parent
d791b2377c
commit
df54db176d
|
@ -87,20 +87,16 @@ func (c *PointClient) GetRecentAllPostsPage() (Page, error) {
|
||||||
return page, nil
|
return page, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
func (c *PointClient) GetNextAllPostsPageBeforeUid(uid int) (Page, error) {
|
||||||
var nextPage Page
|
var nextPage Page
|
||||||
|
|
||||||
if false == page.HasNext || len(page.Posts) == 0 {
|
|
||||||
return nextPage, errors.New("Page must have has_next=true and some posts")
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(c.token.AuthToken) == 0 {
|
if len(c.token.AuthToken) == 0 {
|
||||||
log.Fatal("Can not get recent posts. Login first.")
|
log.Fatal("Can not get recent posts. Login first.")
|
||||||
return nextPage, errors.New("Login first")
|
return nextPage, errors.New("Login first")
|
||||||
}
|
}
|
||||||
|
|
||||||
data := url.Values{}
|
data := url.Values{}
|
||||||
data.Set("before", strconv.Itoa(page.Posts[len(page.Posts)-1].Uid))
|
data.Set("before", strconv.Itoa(uid))
|
||||||
|
|
||||||
headers := map[string]string{
|
headers := map[string]string{
|
||||||
"Authorization": c.token.AuthToken,
|
"Authorization": c.token.AuthToken,
|
||||||
|
@ -127,3 +123,20 @@ func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
||||||
|
|
||||||
return nextPage, nil
|
return nextPage, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
||||||
|
var nextPage Page
|
||||||
|
|
||||||
|
if len(page.Posts) == 0 {
|
||||||
|
return nextPage, errors.New("Page must have has_next=true and some posts")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(c.token.AuthToken) == 0 {
|
||||||
|
log.Fatal("Can not get recent posts. Login first.")
|
||||||
|
return nextPage, errors.New("Login first")
|
||||||
|
}
|
||||||
|
|
||||||
|
nextPage, err := c.GetNextAllPostsPageBeforeUid(page.Posts[len(page.Posts)-1].Uid)
|
||||||
|
|
||||||
|
return nextPage, err
|
||||||
|
}
|
||||||
|
|
|
@ -14,13 +14,17 @@ func main() {
|
||||||
var pointApiUrl, pointToolsApiUrl string;
|
var pointApiUrl, pointToolsApiUrl string;
|
||||||
var pointLogin, pointPassword, pointToolsToken string
|
var pointLogin, pointPassword, pointToolsToken string
|
||||||
var forceContinue bool
|
var forceContinue bool
|
||||||
|
var fromUid, limit int
|
||||||
|
|
||||||
|
// Todo refactor the CLI options to new library
|
||||||
flag.StringVar(&pointLogin, "l", "", "Account login")
|
flag.StringVar(&pointLogin, "l", "", "Account login")
|
||||||
flag.StringVar(&pointPassword, "p", "", "Account password")
|
flag.StringVar(&pointPassword, "p", "", "Account password")
|
||||||
flag.StringVar(&pointToolsToken, "t", "", "Point Tools crawler API token")
|
flag.StringVar(&pointToolsToken, "t", "", "Point Tools crawler API token")
|
||||||
flag.StringVar(&pointApiUrl, "s", "https://point.im/api/", "Point.im API url")
|
flag.StringVar(&pointApiUrl, "s", "https://point.im/api/", "Point.im API url")
|
||||||
flag.StringVar(&pointToolsApiUrl, "g", "https://point.skobk.in/api/crawler/", "Point Tools API url")
|
flag.StringVar(&pointToolsApiUrl, "g", "https://point.skobk.in/api/crawler/", "Point Tools API url")
|
||||||
flag.BoolVar(&forceContinue, "f", false, "Force continue reading /all despite of server refusal")
|
flag.BoolVar(&forceContinue, "f", false, "Force continue reading /all despite of server refusal")
|
||||||
|
flag.IntVar(&fromUid, "u", 0, "From which UID start to load pages")
|
||||||
|
flag.IntVar(&limit, "c", 0, "How many pages to get")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if len(pointLogin) < 1 || len(pointPassword) < 1 {
|
if len(pointLogin) < 1 || len(pointPassword) < 1 {
|
||||||
|
@ -45,7 +49,14 @@ func main() {
|
||||||
fmt.Printf("Successfully authenticated!\n")
|
fmt.Printf("Successfully authenticated!\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
page, reqErr := pointClient.GetRecentAllPostsPage()
|
var page point.Page
|
||||||
|
var reqErr error = nil
|
||||||
|
|
||||||
|
if 0 != fromUid {
|
||||||
|
page, reqErr = pointClient.GetNextAllPostsPageBeforeUid(fromUid)
|
||||||
|
} else {
|
||||||
|
page, reqErr = pointClient.GetRecentAllPostsPage()
|
||||||
|
}
|
||||||
|
|
||||||
if reqErr != nil {
|
if reqErr != nil {
|
||||||
log.Fatal(reqErr)
|
log.Fatal(reqErr)
|
||||||
|
@ -79,6 +90,11 @@ func main() {
|
||||||
for page.HasNext {
|
for page.HasNext {
|
||||||
pageNumber++
|
pageNumber++
|
||||||
|
|
||||||
|
if pageNumber > limit {
|
||||||
|
fmt.Println("Limit reached")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
page, reqErr = pointClient.GetNextAllPostsPage(page)
|
page, reqErr = pointClient.GetNextAllPostsPage(page)
|
||||||
|
|
||||||
if reqErr != nil {
|
if reqErr != nil {
|
||||||
|
|
Loading…
Reference in a new issue