-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature: Supporting LIBRARY as data source
- Loading branch information
1 parent
ee6ca3b
commit 32a6114
Showing
7 changed files
with
198 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from avutil.avutil import * | ||
from avutil.video import * | ||
from avutil.bus import * | ||
from avutil.library import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import requests | ||
import bs4 | ||
|
||
|
||
def encode(url): | ||
return "".join([chr((ord(rune) + 1) % 128) for rune in url]) | ||
|
||
|
||
class Bus: | ||
''' Data source -- BUS | ||
''' | ||
base_url = "" | ||
|
||
def __init__(self): | ||
self.base_url = encode("gsso9..vvv-i`uatr-bnl.") | ||
|
||
def Get(self, designatio, use_proxy=False, http_proxy=None): | ||
result = {} | ||
|
||
# URL for searching designatio | ||
URL = self.base_url + designatio | ||
|
||
# Using requests | ||
headers = { | ||
'Cache-Control': 'no-cache', | ||
'Accept': 'text/event-stream', | ||
'Accept-Encoding': 'gzip' | ||
} | ||
if use_proxy: | ||
response = requests.get(URL, proxies={"http": http_proxy}, headers=headers) | ||
else: | ||
response = requests.get(URL, headers=headers) | ||
|
||
# parse html | ||
soup = bs4.BeautifulSoup(response.content, features="html.parser") | ||
|
||
# search title | ||
result["title"] = soup.select_one("body > .container > h3").string | ||
|
||
# cover image | ||
result["cover_url"] = soup.select_one(".bigImage")["href"] | ||
|
||
# infomation | ||
attributes = [e.string for e in soup.select(".header")] | ||
include = { | ||
"designatio": '識別碼:' in attributes, | ||
"date": '發行日期:' in attributes, | ||
"length": '長度:' in attributes, | ||
"director": '導演:' in attributes, | ||
"maker": '製作商:' in attributes, | ||
"label": '發行商:' in attributes, | ||
"series": '系列:' in attributes, | ||
"genres": '類別:' in attributes, | ||
"cast": '演員' in attributes, | ||
} | ||
|
||
# Attributes Extract lambda function | ||
extract = { | ||
"designatio": lambda soup, i: i.select("span")[1].string, | ||
"date": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"), | ||
"length": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"), | ||
"director": lambda soup, i: i.a.string, | ||
"maker": lambda soup, i: i.a.string, | ||
"label": lambda soup, i: i.a.string, | ||
"series": lambda soup, i: i.a.string, | ||
"genres": lambda soup, i: [genre.string for genre in soup.select('a[href^="https://www.javbus.com/genre/"]')][2:], | ||
"cast": lambda soup, i: [actor.a.string for actor in soup.select('span[onmouseout^="hoverdiv"]')], | ||
} | ||
|
||
info = soup.select(".info > p") | ||
idx = 0 | ||
|
||
for attr in ["designatio", "date", "length", "director", "maker", "label", "series", "genres", "cast"]: | ||
if include[attr]: | ||
result[attr] = extract[attr](soup, info[idx]) | ||
idx += 1 | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import requests | ||
import bs4 | ||
|
||
|
||
def encode(url): | ||
return "".join([chr((ord(rune) + 1) % 128) for rune in url]) | ||
|
||
|
||
class Library: | ||
''' Data source -- LIBRARY | ||
''' | ||
base_url = "" | ||
|
||
def __init__(self): | ||
self.base_url = encode( | ||
"gsso9..vvv-i`ukhaq`qx-bnl.bm.uk^rd`qbgaxhc-ogo>jdxvnqc<") | ||
|
||
def Get(self, designatio, use_proxy=False, http_proxy=None): | ||
result = {} | ||
|
||
# URL for searching designatio | ||
URL = self.base_url + designatio | ||
|
||
# Using requests | ||
headers = { | ||
'Cache-Control': 'no-cache', | ||
'Accept': 'text/event-stream', | ||
'Accept-Encoding': 'gzip' | ||
} | ||
if use_proxy: | ||
response = requests.get( | ||
URL, proxies={"http": http_proxy}, headers=headers) | ||
else: | ||
response = requests.get(URL, headers=headers) | ||
|
||
# parse html | ||
response.iter_lines() | ||
soup = bs4.BeautifulSoup(response.content, features="html.parser") | ||
|
||
# search title | ||
result["title"] = soup.select_one(".post-title").getText() | ||
|
||
# cover image | ||
result["cover_url"] = "http:" + \ | ||
soup.select_one("#video_jacket_img")["src"] | ||
|
||
# Attributes Extract lambda function | ||
extract = { | ||
"designatio": lambda s: s.select_one("#video_id .text").getText(), | ||
"date": lambda s: s.select_one("#video_date .text").getText(), | ||
"length": lambda s: s.select_one("#video_length .text").getText() + "分锺", | ||
"director": lambda s: s.select_one("#video_director .text").getText(), | ||
"maker": lambda s: s.select_one("#video_maker .text").getText(), | ||
"label": lambda s: s.select_one("#video_label .text").getText(), | ||
"genres": lambda s: [genre.getText().strip() for genre in s.select("#video_genres .genre")], | ||
"cast": lambda s: [actor.getText().strip() for actor in s.select("#video_cast .star")], | ||
"review": lambda s: s.select_one("#video_review .score").getText(), | ||
} | ||
|
||
info = soup.select_one("#video_info") | ||
for attr, func in extract.items(): | ||
result[attr] = func(info) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.