Crawler now supports 2 new flags: -u (select from uuid) and -c (number of pages to get).

This commit is contained in:
Alexey Skobkin 2016-03-28 02:48:16 +03:00
parent d791b2377c
commit df54db176d
2 changed files with 36 additions and 7 deletions

View file

@ -87,20 +87,16 @@ func (c *PointClient) GetRecentAllPostsPage() (Page, error) {
return page, nil return page, nil
} }
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) { func (c *PointClient) GetNextAllPostsPageBeforeUid(uid int) (Page, error) {
var nextPage Page var nextPage Page
if false == page.HasNext || len(page.Posts) == 0 {
return nextPage, errors.New("Page must have has_next=true and some posts")
}
if len(c.token.AuthToken) == 0 { if len(c.token.AuthToken) == 0 {
log.Fatal("Can not get recent posts. Login first.") log.Fatal("Can not get recent posts. Login first.")
return nextPage, errors.New("Login first") return nextPage, errors.New("Login first")
} }
data := url.Values{} data := url.Values{}
data.Set("before", strconv.Itoa(page.Posts[len(page.Posts)-1].Uid)) data.Set("before", strconv.Itoa(uid))
headers := map[string]string{ headers := map[string]string{
"Authorization": c.token.AuthToken, "Authorization": c.token.AuthToken,
@ -127,3 +123,20 @@ func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
return nextPage, nil return nextPage, nil
} }
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
var nextPage Page
if len(page.Posts) == 0 {
return nextPage, errors.New("Page must have has_next=true and some posts")
}
if len(c.token.AuthToken) == 0 {
log.Fatal("Can not get recent posts. Login first.")
return nextPage, errors.New("Login first")
}
nextPage, err := c.GetNextAllPostsPageBeforeUid(page.Posts[len(page.Posts)-1].Uid)
return nextPage, err
}

View file

@ -14,13 +14,17 @@ func main() {
var pointApiUrl, pointToolsApiUrl string; var pointApiUrl, pointToolsApiUrl string;
var pointLogin, pointPassword, pointToolsToken string var pointLogin, pointPassword, pointToolsToken string
var forceContinue bool var forceContinue bool
var fromUid, limit int
// Todo refactor the CLI options to new library
flag.StringVar(&pointLogin, "l", "", "Account login") flag.StringVar(&pointLogin, "l", "", "Account login")
flag.StringVar(&pointPassword, "p", "", "Account password") flag.StringVar(&pointPassword, "p", "", "Account password")
flag.StringVar(&pointToolsToken, "t", "", "Point Tools crawler API token") flag.StringVar(&pointToolsToken, "t", "", "Point Tools crawler API token")
flag.StringVar(&pointApiUrl, "s", "https://point.im/api/", "Point.im API url") flag.StringVar(&pointApiUrl, "s", "https://point.im/api/", "Point.im API url")
flag.StringVar(&pointToolsApiUrl, "g", "https://point.skobk.in/api/crawler/", "Point Tools API url") flag.StringVar(&pointToolsApiUrl, "g", "https://point.skobk.in/api/crawler/", "Point Tools API url")
flag.BoolVar(&forceContinue, "f", false, "Force continue reading /all despite of server refusal") flag.BoolVar(&forceContinue, "f", false, "Force continue reading /all despite of server refusal")
flag.IntVar(&fromUid, "u", 0, "From which UID start to load pages")
flag.IntVar(&limit, "c", 0, "How many pages to get")
flag.Parse() flag.Parse()
if len(pointLogin) < 1 || len(pointPassword) < 1 { if len(pointLogin) < 1 || len(pointPassword) < 1 {
@ -45,7 +49,14 @@ func main() {
fmt.Printf("Successfully authenticated!\n") fmt.Printf("Successfully authenticated!\n")
} }
page, reqErr := pointClient.GetRecentAllPostsPage() var page point.Page
var reqErr error = nil
if 0 != fromUid {
page, reqErr = pointClient.GetNextAllPostsPageBeforeUid(fromUid)
} else {
page, reqErr = pointClient.GetRecentAllPostsPage()
}
if reqErr != nil { if reqErr != nil {
log.Fatal(reqErr) log.Fatal(reqErr)
@ -79,6 +90,11 @@ func main() {
for page.HasNext { for page.HasNext {
pageNumber++ pageNumber++
if pageNumber > limit {
fmt.Println("Limit reached")
break
}
page, reqErr = pointClient.GetNextAllPostsPage(page) page, reqErr = pointClient.GetNextAllPostsPage(page)
if reqErr != nil { if reqErr != nil {