Feature: Supporting LIBRARY as data source

Lqlsoftware · Oct 24, 2020 · 32a6114 · 32a6114
1 parent ee6ca3b
commit 32a6114
Show file tree

Hide file tree

Showing 7 changed files with 198 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,8 @@
 
 [![Release](https://img.shields.io/pypi/v/avutil?color=%2366CCFF&label=release)](https://pypi.org/project/avutil/)
 
-Provide some useful utils for *tidying up* your personal video folder
+Provide some useful utils for *tidying up* your personal video folder.
+Data source from *LIBRARY* or *BUS*.
 
 - Extract designatio
 - Search folder (recursively)
@@ -27,13 +28,13 @@ pip install avutil
 ## Usage
 
 ```sh
-# tidyup -h
+$ tidyup -h
 ```
 
 Tidy up current dir
 
 ```sh
-# tidyup
+$ tidyup
 ```
 
 ## Usage in Python script
@@ -54,30 +55,39 @@ Or you can specify the extension type of video
 videos = Search_folder(folder, media_suffix={"mp4", "wmv", "avi", "mkv"})
 ```
 
-Pull video info & download cover image
+Pull video info from *LIBRARY* by default & download cover image
 ```python
 for video in videos:
  # Pull video info
  video.pull_info()
- print(video)
 
  # Download cover image (as video.title + .jpg)
  video.download_cover()
 ```
 
-(Or proxy supported!)
+It's okey using *BUS*
 ```python
 for video in videos:
- # Pull video info using proxy
- video.pull_info(use_proxy=True, http_proxy="http://127.0.0.1:1087")
- print(video)
- # Download cover image using proxy (as video.title + .jpg)
- video.download_cover(use_proxy=True, http_proxy="http://127.0.0.1:1087")
+ # Pull video info
+ video.pull_info(source=avutil.Bus())
 ```
 
 Tidy up!
 
 ```python
  # Tidy up (rename to video.designatio + video.actors)
  video.rename()
+```
+
+## Proxy
+
+Proxy is supported in avutil in two ways, you can define a global bash variable:
+```shell
+$ export ALL_PROXY="127.0.0.1:1087"
+```
+
+Or you can pass http-proxy in code
+```python
+video.pull_info(use_proxy=True, http_proxy="http://127.0.0.1:1087")
+video.download_cover(use_proxy=True, http_proxy="http://127.0.0.1:1087")
 ```
diff --git a/avutil/__init__.py b/avutil/__init__.py
@@ -1 +1,3 @@
-from avutil.avutil import *
+from avutil.video import *
+from avutil.bus import *
+from avutil.library import *
diff --git a/avutil/bus.py b/avutil/bus.py
@@ -0,0 +1,77 @@
+import requests
+import bs4
+
+
+def encode(url):
+ return "".join([chr((ord(rune) + 1) % 128) for rune in url])
+
+
+class Bus:
+ ''' Data source -- BUS
+ '''
+ base_url = ""
+
+ def __init__(self):
+ self.base_url = encode("gsso9..vvv-i`uatr-bnl.")
+
+ def Get(self, designatio, use_proxy=False, http_proxy=None):
+ result = {}
+
+ # URL for searching designatio
+ URL = self.base_url + designatio
+
+ # Using requests
+ headers = {
+ 'Cache-Control': 'no-cache',
+ 'Accept': 'text/event-stream',
+ 'Accept-Encoding': 'gzip'
+ }
+ if use_proxy:
+ response = requests.get(URL, proxies={"http": http_proxy}, headers=headers)
+ else:
+ response = requests.get(URL, headers=headers)
+
+ # parse html
+ soup = bs4.BeautifulSoup(response.content, features="html.parser")
+
+ # search title
+ result["title"] = soup.select_one("body > .container > h3").string
+
+ # cover image
+ result["cover_url"] = soup.select_one(".bigImage")["href"]
+
+ # infomation
+ attributes = [e.string for e in soup.select(".header")]
+ include = {
+ "designatio": '識別碼:' in attributes,
+ "date": '發行日期:' in attributes,
+ "length": '長度:' in attributes,
+ "director": '導演:' in attributes,
+ "maker": '製作商:' in attributes,
+ "label": '發行商:' in attributes,
+ "series": '系列:' in attributes,
+ "genres": '類別:' in attributes,
+ "cast": '演員' in attributes,
+ }
+
+ # Attributes Extract lambda function
+ extract = {
+ "designatio": lambda soup, i: i.select("span")[1].string,
+ "date": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"),
+ "length": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"),
+ "director": lambda soup, i: i.a.string,
+ "maker": lambda soup, i: i.a.string,
+ "label": lambda soup, i: i.a.string,
+ "series": lambda soup, i: i.a.string,
+ "genres": lambda soup, i: [genre.string for genre in soup.select('a[href^="https://www.javbus.com/genre/"]')][2:],
+ "cast": lambda soup, i: [actor.a.string for actor in soup.select('span[onmouseout^="hoverdiv"]')],
+ }
+
+ info = soup.select(".info > p")
+ idx = 0
+
+ for attr in ["designatio", "date", "length", "director", "maker", "label", "series", "genres", "cast"]:
+ if include[attr]:
+ result[attr] = extract[attr](soup, info[idx])
+ idx += 1
+ return result
diff --git a/avutil/library.py b/avutil/library.py
@@ -0,0 +1,63 @@
+import requests
+import bs4
+
+
+def encode(url):
+ return "".join([chr((ord(rune) + 1) % 128) for rune in url])
+
+
+class Library:
+ ''' Data source -- LIBRARY
+ '''
+ base_url = ""
+
+ def __init__(self):
+ self.base_url = encode(
+ "gsso9..vvv-i`ukhaq`qx-bnl.bm.uk^rd`qbgaxhc-ogo>jdxvnqc<")
+
+ def Get(self, designatio, use_proxy=False, http_proxy=None):
+ result = {}
+
+ # URL for searching designatio
+ URL = self.base_url + designatio
+
+ # Using requests
+ headers = {
+ 'Cache-Control': 'no-cache',
+ 'Accept': 'text/event-stream',
+ 'Accept-Encoding': 'gzip'
+ }
+ if use_proxy:
+ response = requests.get(
+ URL, proxies={"http": http_proxy}, headers=headers)
+ else:
+ response = requests.get(URL, headers=headers)
+
+ # parse html
+ response.iter_lines()
+ soup = bs4.BeautifulSoup(response.content, features="html.parser")
+
+ # search title
+ result["title"] = soup.select_one(".post-title").getText()
+
+ # cover image
+ result["cover_url"] = "http:" + \
+ soup.select_one("#video_jacket_img")["src"]
+
+ # Attributes Extract lambda function
+ extract = {
+ "designatio": lambda s: s.select_one("#video_id .text").getText(),
+ "date": lambda s: s.select_one("#video_date .text").getText(),
+ "length": lambda s: s.select_one("#video_length .text").getText() + "分锺",
+ "director": lambda s: s.select_one("#video_director .text").getText(),
+ "maker": lambda s: s.select_one("#video_maker .text").getText(),
+ "label": lambda s: s.select_one("#video_label .text").getText(),
+ "genres": lambda s: [genre.getText().strip() for genre in s.select("#video_genres .genre")],
+ "cast": lambda s: [actor.getText().strip() for actor in s.select("#video_cast .star")],
+ "review": lambda s: s.select_one("#video_review .score").getText(),
+ }
+
+ info = soup.select_one("#video_info")
+ for attr, func in extract.items():
+ result[attr] = func(info)
+ return result
diff --git a/avutil/tidy_up.py b/avutil/tidy_up.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 import sys
 import argparse
-import avutil
 import pickle
+import avutil
+
 sys.setrecursionlimit(10000)