Skip to content

Commit

Permalink
Feature: Supporting LIBRARY as data source
Browse files Browse the repository at this point in the history
  • Loading branch information
Lqlsoftware committed Oct 24, 2020
1 parent ee6ca3b commit 32a6114
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 91 deletions.
32 changes: 21 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

[![Release](https://img.shields.io/pypi/v/avutil?color=%2366CCFF&label=release)](https://pypi.org/project/avutil/)

Provide some useful utils for *tidying up* your personal video folder
Provide some useful utils for *tidying up* your personal video folder.
Data source from *LIBRARY* or *BUS*.

- Extract designatio
- Search folder (recursively)
Expand All @@ -27,13 +28,13 @@ pip install avutil
## Usage

```sh
# tidyup -h
$ tidyup -h
```

Tidy up current dir

```sh
# tidyup
$ tidyup
```

## Usage in Python script
Expand All @@ -54,30 +55,39 @@ Or you can specify the extension type of video
videos = Search_folder(folder, media_suffix={"mp4", "wmv", "avi", "mkv"})
```

Pull video info & download cover image
Pull video info from *LIBRARY* by default & download cover image
```python
for video in videos:
# Pull video info
video.pull_info()
print(video)

# Download cover image (as video.title + .jpg)
video.download_cover()
```

(Or proxy supported!)
It's okey using *BUS*
```python
for video in videos:
# Pull video info using proxy
video.pull_info(use_proxy=True, http_proxy="http://127.0.0.1:1087")
print(video)
# Download cover image using proxy (as video.title + .jpg)
video.download_cover(use_proxy=True, http_proxy="http://127.0.0.1:1087")
# Pull video info
video.pull_info(source=avutil.Bus())
```

Tidy up!

```python
# Tidy up (rename to video.designatio + video.actors)
video.rename()
```

## Proxy

Proxy is supported in avutil in two ways, you can define a global bash variable:
```shell
$ export ALL_PROXY="127.0.0.1:1087"
```

Or you can pass http-proxy in code
```python
video.pull_info(use_proxy=True, http_proxy="http://127.0.0.1:1087")
video.download_cover(use_proxy=True, http_proxy="http://127.0.0.1:1087")
```
4 changes: 3 additions & 1 deletion avutil/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from avutil.avutil import *
from avutil.video import *
from avutil.bus import *
from avutil.library import *
77 changes: 77 additions & 0 deletions avutil/bus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import requests
import bs4


def encode(url):
return "".join([chr((ord(rune) + 1) % 128) for rune in url])


class Bus:
''' Data source -- BUS
'''
base_url = ""

def __init__(self):
self.base_url = encode("gsso9..vvv-i`uatr-bnl.")

def Get(self, designatio, use_proxy=False, http_proxy=None):
result = {}

# URL for searching designatio
URL = self.base_url + designatio

# Using requests
headers = {
'Cache-Control': 'no-cache',
'Accept': 'text/event-stream',
'Accept-Encoding': 'gzip'
}
if use_proxy:
response = requests.get(URL, proxies={"http": http_proxy}, headers=headers)
else:
response = requests.get(URL, headers=headers)

# parse html
soup = bs4.BeautifulSoup(response.content, features="html.parser")

# search title
result["title"] = soup.select_one("body > .container > h3").string

# cover image
result["cover_url"] = soup.select_one(".bigImage")["href"]

# infomation
attributes = [e.string for e in soup.select(".header")]
include = {
"designatio": '識別碼:' in attributes,
"date": '發行日期:' in attributes,
"length": '長度:' in attributes,
"director": '導演:' in attributes,
"maker": '製作商:' in attributes,
"label": '發行商:' in attributes,
"series": '系列:' in attributes,
"genres": '類別:' in attributes,
"cast": '演員' in attributes,
}

# Attributes Extract lambda function
extract = {
"designatio": lambda soup, i: i.select("span")[1].string,
"date": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"),
"length": lambda soup, i: str(i).split("</span> ")[1].rstrip("</p>"),
"director": lambda soup, i: i.a.string,
"maker": lambda soup, i: i.a.string,
"label": lambda soup, i: i.a.string,
"series": lambda soup, i: i.a.string,
"genres": lambda soup, i: [genre.string for genre in soup.select('a[href^="https://www.javbus.com/genre/"]')][2:],
"cast": lambda soup, i: [actor.a.string for actor in soup.select('span[onmouseout^="hoverdiv"]')],
}

info = soup.select(".info > p")
idx = 0

for attr in ["designatio", "date", "length", "director", "maker", "label", "series", "genres", "cast"]:
if include[attr]:
result[attr] = extract[attr](soup, info[idx])
idx += 1
return result
63 changes: 63 additions & 0 deletions avutil/library.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import requests
import bs4


def encode(url):
return "".join([chr((ord(rune) + 1) % 128) for rune in url])


class Library:
''' Data source -- LIBRARY
'''
base_url = ""

def __init__(self):
self.base_url = encode(
"gsso9..vvv-i`ukhaq`qx-bnl.bm.uk^rd`qbgaxhc-ogo>jdxvnqc<")

def Get(self, designatio, use_proxy=False, http_proxy=None):
result = {}

# URL for searching designatio
URL = self.base_url + designatio

# Using requests
headers = {
'Cache-Control': 'no-cache',
'Accept': 'text/event-stream',
'Accept-Encoding': 'gzip'
}
if use_proxy:
response = requests.get(
URL, proxies={"http": http_proxy}, headers=headers)
else:
response = requests.get(URL, headers=headers)

# parse html
response.iter_lines()
soup = bs4.BeautifulSoup(response.content, features="html.parser")

# search title
result["title"] = soup.select_one(".post-title").getText()

# cover image
result["cover_url"] = "http:" + \
soup.select_one("#video_jacket_img")["src"]

# Attributes Extract lambda function
extract = {
"designatio": lambda s: s.select_one("#video_id .text").getText(),
"date": lambda s: s.select_one("#video_date .text").getText(),
"length": lambda s: s.select_one("#video_length .text").getText() + "分锺",
"director": lambda s: s.select_one("#video_director .text").getText(),
"maker": lambda s: s.select_one("#video_maker .text").getText(),
"label": lambda s: s.select_one("#video_label .text").getText(),
"genres": lambda s: [genre.getText().strip() for genre in s.select("#video_genres .genre")],
"cast": lambda s: [actor.getText().strip() for actor in s.select("#video_cast .star")],
"review": lambda s: s.select_one("#video_review .score").getText(),
}

info = soup.select_one("#video_info")
for attr, func in extract.items():
result[attr] = func(info)
return result
3 changes: 2 additions & 1 deletion avutil/tidy_up.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python3
import sys
import argparse
import avutil
import pickle
import avutil

sys.setrecursionlimit(10000)


Expand Down
Loading

0 comments on commit 32a6114

Please sign in to comment.