Skip to content

Commit

Permalink
Feature(scraper): support FlareSolverr (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
xjasonlyu authored Dec 29, 2023
1 parent 2314123 commit 585c3b8
Show file tree
Hide file tree
Showing 41 changed files with 326 additions and 62 deletions.
6 changes: 4 additions & 2 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ type options struct {
dsn string

// engine options
requestTimeout time.Duration
requestTimeout time.Duration
flareSolverrURL string

// database options
dbMaxIdleConns int
Expand All @@ -56,6 +57,7 @@ func init() {
flag.StringVar(&opts.token, "token", "", "Token to access server")
flag.StringVar(&opts.dsn, "dsn", "", "Database Service Name")
flag.DurationVar(&opts.requestTimeout, "request-timeout", time.Minute, "Timeout per request")
flag.StringVar(&opts.flareSolverrURL, "flaresolverr-url", "", "FlareSolverr base url")
flag.IntVar(&opts.dbMaxIdleConns, "db-max-idle-conns", 0, "Database max idle connections")
flag.IntVar(&opts.dbMaxOpenConns, "db-max-open-conns", 0, "Database max open connections")
flag.BoolVar(&opts.dbAutoMigrate, "db-auto-migrate", false, "Database auto migration")
Expand Down Expand Up @@ -96,7 +98,7 @@ func main() {
opts.requestTimeout = defaultRequestTimeout
}

app := engine.New(db, opts.requestTimeout)
app := engine.New(db, opts.requestTimeout, opts.flareSolverrURL)
if err = app.AutoMigrate(opts.dbAutoMigrate); err != nil {
log.Fatal(err)
}
Expand Down
26 changes: 26 additions & 0 deletions common/flaresolverr/buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package flaresolverr

import (
"bytes"
"io"
)

var _ io.ReadCloser = (*readCloser)(nil)

type readCloser struct {
*bytes.Reader
}

func newReadCloser(b []byte) *readCloser {
return &readCloser{
Reader: bytes.NewReader(b),
}
}

func newReadCloserString(s string) *readCloser {
return newReadCloser([]byte(s))
}

func (b *readCloser) Close() error {
return nil
}
67 changes: 67 additions & 0 deletions common/flaresolverr/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package flaresolverr

import (
"fmt"
"net/http"
"net/url"
"time"

gofs "github.com/SkYNewZ/go-flaresolverr"
"github.com/google/uuid"
"golang.org/x/net/context"
)

type Client struct {
client gofs.Client
session uuid.UUID
}

func New(url string, timeout time.Duration, session uuid.UUID) *Client {
return &Client{
client: gofs.New(url, timeout, nil),
session: session,
}
}

func (c *Client) Do(req *http.Request) (*http.Response, error) {
var (
resp *gofs.Response
err error
)
switch req.Method {
case http.MethodGet:
resp, err = c.client.Get(context.Background(), req.URL.String(), c.session)
case http.MethodPost:
fallthrough
default:
return nil, fmt.Errorf("unsupported method: %s", req.Method)
}
if err != nil {
return nil, err
}

header := http.Header{}
header.Set("Content-Type", http.DetectContentType([]byte(resp.Solution.Response)))

return &http.Response{
Status: http.StatusText(http.StatusOK),
StatusCode: http.StatusOK,
Request: req,
Header: header,
Body: newReadCloserString(resp.Solution.Response),
}, nil
}

func (c *Client) Get(rawURL string) (*http.Response, error) {
u, err := url.Parse(rawURL)
if err != nil {
return nil, err
}
return c.Do(&http.Request{Method: http.MethodGet, URL: u})
}

func (c *Client) StandardClient() *http.Client {
return &http.Client{
Transport: &RoundTripper{Client: c},
}
}
14 changes: 14 additions & 0 deletions common/flaresolverr/roundtripper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package flaresolverr

import (
"net/http"
)

type RoundTripper struct {
Client *Client
}

// RoundTrip satisfies the http.RoundTripper interface.
func (rt *RoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return rt.Client.Do(req)
}
16 changes: 13 additions & 3 deletions engine/actor.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package engine

import (
goerrors "errors"
"fmt"
"sort"
"sync"
Expand Down Expand Up @@ -85,7 +86,7 @@ func (e *Engine) searchActor(keyword string, provider mt.Provider, fallback bool
innerResults, innerErr := innerSearch(name)
if innerErr != nil &&
// ignore InfoNotFound error.
innerErr != mt.ErrInfoNotFound {
!goerrors.Is(innerErr, mt.ErrInfoNotFound) {
// add error to chain and handle it later.
errors = append(errors, innerErr)
continue
Expand All @@ -109,12 +110,21 @@ func (e *Engine) SearchActor(keyword, name string, fallback bool) ([]*model.Acto
return e.searchActor(keyword, provider, fallback)
}

func (e *Engine) SearchActorAll(keyword string, fallback bool) (results []*model.ActorSearchResult, err error) {
func (e *Engine) SearchActorAll(keyword, lang string, fallback bool) (results []*model.ActorSearchResult, err error) {
var (
mu sync.Mutex
wg sync.WaitGroup
)
for _, provider := range e.actorProviders {

availableProviders := e.actorProviders
if lang != "" {
if availableProviders, err = e.GetActorProvidersByLanguage(lang); err != nil {
return
}
e.logger.Infof("Actor Keyword: %s, Language: %s, Providers: %v", keyword, lang, availableProviders)
}

for _, provider := range availableProviders {
wg.Add(1)
go func(provider mt.ActorProvider) {
defer wg.Done()
Expand Down
53 changes: 47 additions & 6 deletions engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"go.uber.org/zap"
"golang.org/x/text/language"
"gorm.io/gorm"

"github.com/metatube-community/metatube-sdk-go/common/fetch"
Expand All @@ -28,15 +29,15 @@ type Engine struct {
movieHostProviders map[string][]mt.MovieProvider
}

func New(db *gorm.DB, timeout time.Duration) *Engine {
func New(db *gorm.DB, timeout time.Duration, flareSolverrURL string) *Engine {
engine := &Engine{
db: db,
fetcher: fetch.Default(nil),
}
logger, _ := zap.NewProduction()
engine.logger = logger.Sugar()
engine.initActorProviders(timeout)
engine.initMovieProviders(timeout)
engine.initActorProviders(timeout, flareSolverrURL)
engine.initMovieProviders(timeout, flareSolverrURL)
return engine
}

Expand All @@ -45,13 +46,13 @@ func Default() *Engine {
DSN: "",
DisableAutomaticPing: true,
})
engine := New(db, time.Minute)
engine := New(db, time.Minute, "")
defer engine.AutoMigrate(true)
return engine
}

// initActorProviders initializes actor providers.
func (e *Engine) initActorProviders(timeout time.Duration) {
func (e *Engine) initActorProviders(timeout time.Duration, flareSolverrURL string) {
{ // init
e.actorProviders = make(map[string]mt.ActorProvider)
e.actorHostProviders = make(map[string][]mt.ActorProvider)
Expand All @@ -61,6 +62,9 @@ func (e *Engine) initActorProviders(timeout time.Duration) {
if s, ok := provider.(mt.RequestTimeoutSetter); ok {
s.SetRequestTimeout(timeout)
}
if s, ok := provider.(mt.FlareSolverrSetter); ok {
s.SetFlareSolverr(flareSolverrURL)
}
// Add actor provider by name.
e.actorProviders[strings.ToUpper(name)] = provider
// Add actor provider by host.
Expand All @@ -70,7 +74,7 @@ func (e *Engine) initActorProviders(timeout time.Duration) {
}

// initMovieProviders initializes movie providers.
func (e *Engine) initMovieProviders(timeout time.Duration) {
func (e *Engine) initMovieProviders(timeout time.Duration, flareSolverrURL string) {
{ // init
e.movieProviders = make(map[string]mt.MovieProvider)
e.movieHostProviders = make(map[string][]mt.MovieProvider)
Expand All @@ -80,6 +84,9 @@ func (e *Engine) initMovieProviders(timeout time.Duration) {
if s, ok := provider.(mt.RequestTimeoutSetter); ok {
s.SetRequestTimeout(timeout)
}
if s, ok := provider.(mt.FlareSolverrSetter); ok {
s.SetFlareSolverr(flareSolverrURL)
}
// Add movie provider by name.
e.movieProviders[strings.ToUpper(name)] = provider
// Add movie provider by host.
Expand All @@ -97,6 +104,23 @@ func (e *Engine) GetActorProviders() map[string]mt.ActorProvider {
return e.actorProviders
}

func (e *Engine) GetActorProvidersByLanguage(lang string) (map[string]mt.ActorProvider, error) {
tag, err := language.Parse(lang)
if err != nil {
return nil, err
}

providers := make(map[string]mt.ActorProvider)
matcher := language.NewMatcher([]language.Tag{tag})

for _, provider := range e.actorProviders {
if _, _, c := matcher.Match(provider.Language()); c >= language.Low {
providers[strings.ToUpper(provider.Name())] = provider
}
}
return providers, nil
}

func (e *Engine) GetActorProviderByURL(rawURL string) (mt.ActorProvider, error) {
u, err := url.Parse(rawURL)
if err != nil {
Expand Down Expand Up @@ -135,6 +159,23 @@ func (e *Engine) GetMovieProviders() map[string]mt.MovieProvider {
return e.movieProviders
}

func (e *Engine) GetMovieProvidersByLanguage(lang string) (map[string]mt.MovieProvider, error) {
tag, err := language.Parse(lang)
if err != nil {
return nil, err
}

providers := make(map[string]mt.MovieProvider)
matcher := language.NewMatcher([]language.Tag{tag})

for _, provider := range e.movieProviders {
if _, _, c := matcher.Match(provider.Language()); c >= language.Low {
providers[strings.ToUpper(provider.Name())] = provider
}
}
return providers, nil
}

func (e *Engine) GetMovieProviderByURL(rawURL string) (mt.MovieProvider, error) {
u, err := url.Parse(rawURL)
if err != nil {
Expand Down
22 changes: 17 additions & 5 deletions engine/movie.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (e *Engine) SearchMovie(keyword, name string, fallback bool) ([]*model.Movi
return e.searchMovie(keyword, provider, fallback)
}

func (e *Engine) searchMovieAll(keyword string) (results []*model.MovieSearchResult, err error) {
func (e *Engine) searchMovieAll(keyword string, providers map[string]mt.MovieProvider) (results []*model.MovieSearchResult, err error) {
type response struct {
Results []*model.MovieSearchResult
Error error
Expand All @@ -96,7 +96,7 @@ func (e *Engine) searchMovieAll(keyword string) (results []*model.MovieSearchRes
respCh := make(chan response)

var wg sync.WaitGroup
for _, provider := range e.movieProviders {
for _, provider := range providers {
wg.Add(1)
// Goroutine started time.
startTime := time.Now()
Expand Down Expand Up @@ -139,11 +139,19 @@ func (e *Engine) searchMovieAll(keyword string) (results []*model.MovieSearchRes
}

// SearchMovieAll searches the keyword from all providers.
func (e *Engine) SearchMovieAll(keyword string, fallback bool) (results []*model.MovieSearchResult, err error) {
func (e *Engine) SearchMovieAll(keyword, lang string, fallback bool) (results []*model.MovieSearchResult, err error) {
if keyword = number.Trim(keyword); keyword == "" {
return nil, mt.ErrInvalidKeyword
}

availableProviders := e.movieProviders
if lang != "" {
if availableProviders, err = e.GetMovieProvidersByLanguage(lang); err != nil {
return
}
e.logger.Infof("Movie Keyword: %s, Language: %s, Providers: %v", keyword, lang, availableProviders)
}

defer func() {
if err != nil {
return
Expand Down Expand Up @@ -180,12 +188,16 @@ func (e *Engine) SearchMovieAll(keyword string, fallback bool) (results []*model
// overwrite error.
err = nil
// append results.
results = append(results, innerResults...)
for _, result := range innerResults {
if _, ok := availableProviders[strings.ToUpper(result.Provider)]; ok {
results = append(results, result)
}
}
}
}()
}

results, err = e.searchMovieAll(keyword)
results, err = e.searchMovieAll(keyword, availableProviders)
return
}

Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/metatube-community/metatube-sdk-go
go 1.21

require (
github.com/SkYNewZ/go-flaresolverr v0.0.0-20230503120808-3826fdd382dd
github.com/adrg/strutil v0.3.1
github.com/antchfx/htmlquery v1.3.0
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
Expand All @@ -11,6 +12,7 @@ require (
github.com/gin-gonic/gin v1.9.1
github.com/glebarez/sqlite v1.10.0
github.com/gocolly/colly/v2 v2.1.1-0.20230620150846-a6e3d81fe6b7
github.com/google/uuid v1.5.0
github.com/grafov/m3u8 v0.12.0
github.com/hashicorp/go-retryablehttp v0.7.5
github.com/iancoleman/orderedmap v0.3.0
Expand Down Expand Up @@ -54,7 +56,6 @@ require (
github.com/goccy/go-json v0.10.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.5.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 // indirect
Expand Down
Loading

0 comments on commit 585c3b8

Please sign in to comment.