diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 68f8a54..11484be 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -1,22 +1,45 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -30,7 +53,36 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/config/config.go b/config/config.go
index 10c38bc..97cbb7b 100755
--- a/config/config.go
+++ b/config/config.go
@@ -8,7 +8,10 @@ var (
ContentOptions string
// 显示版本号
Version bool
+ // cookie 内容或者cookie文件地址
+ Cookie string
)
+
const VideoPPT="https://www.icourses.cn/web/sword/portal/shareChapter?cid="
const Assignments="http://www.icourses.cn/web/sword/portal/assignments?cid="
const TestPaper="http://www.icourses.cn/web/sword/portal/testPaper?cid="
diff --git a/download/download.go b/download/download.go
index 7ba2681..a68da31 100755
--- a/download/download.go
+++ b/download/download.go
@@ -16,13 +16,71 @@ import (
//批量下载file数组里的文件
-func DownloadFiles(files []utils.File){
+func DownloadFiles(files []utils.File,courseName string){
for _,file:=range(files){
//下载到指定文件夹
- fmt.Println("\n"+filepath.Join(config.OutputPath,file.FilePATH))
- DownloadFile(file.FileURL,filepath.Join(config.OutputPath,file.FilePATH))
+ fmt.Println("\n"+filepath.Join(config.OutputPath,courseName,file.FilePATH))
+ DownloadFile(file.FileURL,filepath.Join(config.OutputPath,courseName,file.FilePATH))
}
}
+
+//使用cookie下载file文件
+func DownloadCookieFiles(files []utils.File,courseName string,cookie string){
+ for _,file:=range(files){
+ //下载到指定文件夹
+ fmt.Println("\n"+filepath.Join(config.OutputPath,courseName,file.FilePATH))
+ DownloadCookieFile(file.FileURL,filepath.Join(config.OutputPath,courseName,file.FilePATH), cookie)
+ }
+}
+
+//使用cookie下载单个文件
+func DownloadCookieFile(url string,filePath string,cookie string){
+ //fmt.Println(url)
+ //获取需要下载的文件大小
+ dataSize:=getFileSize(url)
+ //获取需要写入的信息
+ client := &http.Client{}
+ reqest, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+ reqest.Header.Set("Cookie",cookie)
+ res, _ := client.Do(reqest)
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
+ }
+
+ //检查文件是否存在
+ if utils.FileExists(filePath){
+ fmt.Printf("file already exists, skipping\n")
+ return
+ }
+ //检查目录是否存在
+ if _, err := os.Stat(filepath.Dir(filePath)); os.IsNotExist(err) {
+ //建立目录
+ _ = os.MkdirAll(filepath.Dir(filePath), os.ModePerm)
+ }
+ //建立进程条,设置参数显示下载速度和下载进度
+ bar := progressbar.NewOptions(
+ int(dataSize),
+ progressbar.OptionSetBytes(int(dataSize)),
+ progressbar.OptionShowCount(),
+ progressbar.OptionShowIts(),
+ )
+
+ // 创建文件
+ dest, err := os.Create(filePath)
+ if err != nil {
+ fmt.Printf("Can't create %s: %v\n", filePath, err)
+ return
+ }
+ defer dest.Close()
+ // 从reader读入文件
+ out := io.MultiWriter(dest, bar)
+ _, _ = io.Copy(out, res.Body)
+}
+
//根据网络URL获得文件的大小
func getFileSize(url string) int64 {
res, err := http.Head(url)
@@ -77,3 +135,4 @@ func DownloadFile(url string,filePath string){
out := io.MultiWriter(dest, bar)
_, _ = io.Copy(out, res.Body)
}
+
diff --git a/icourse b/icourse
index 55236bf..a732c31 100755
Binary files a/icourse and b/icourse differ
diff --git a/icourse-darwin b/icourse-darwin
index 7d036cf..a4f5a0d 100755
Binary files a/icourse-darwin and b/icourse-darwin differ
diff --git a/icourse.exe b/icourse.exe
index cbe3b44..8c11b32 100755
Binary files a/icourse.exe and b/icourse.exe differ
diff --git a/main.go b/main.go
index 36a934c..ea9d5e6 100755
--- a/main.go
+++ b/main.go
@@ -16,40 +16,30 @@ import (
func init() {
flag.BoolVar(&config.Version, "v", false, "Show version")
//all为全部下载,most为视频课件以及试卷,也为下载默认选项,videoPPT仅下载视频和课件,exams为仅下载试卷,resources仅下载其它资源
- flag.StringVar(&config.ContentOptions, "c", "most", "Specify the download content {all,most,videoPPT,assignments,testPaper,shareResource}")
+ flag.StringVar(&config.ContentOptions, "co", "all", "Only for icourse : Specify the download content {all,most,videoPPT,assignments,testPaper,shareResource}\nOnly for chinesemooc : Specify the download content {all, video , PPT}")
+ //华文慕课的下载选项,只有三个:全部下载,只下载视频 以及 只下载课件
+
+ //设置下载路径
flag.StringVar(&config.OutputPath, "o", "", "Specify the output path")
- //flag.StringVar(&config.StartUrl, "F", "", "course URL")
+ //设置cookie
+ flag.StringVar(&config.Cookie, "c", "", "Cookie or the path of Cookie file")
}
-func download(url string,options string) bool{
- id:=utils.MatchAll(url,`course_([0-9]*)`)
- if id != nil{
- //得到课程的id地址
- idNum:=id[0][1]
- //fmt.Println(idNum)
- switch options{
- case "all":
- parser.DownloadAll(idNum)
- case "most":
- parser.DownloadMost(idNum)
- case "videoPPT":
- parser.DownloadVideoPPT(idNum)
- case "assignments":
- parser.DownloadAssignments(idNum)
- case "testPaper":
- parser.DownloadTestPaper(idNum)
- case "shareResource":
- parser.DownloadShareResource(idNum)
- }
- } else{
- //网址不符合格式
- fmt.Printf("this website %s is not supported now",url)
- return true
+
+func download(url string) bool {
+ domain := utils.Domain(url)
+ switch domain {
+ case "icourses":
+ parser.DownloadIcourse(url,config.ContentOptions)
+ case "chinesemooc":
+ //fmt.Println("SUCCESS")
+ parser.DownloadChinesemooc(url,config.ContentOptions,config.Cookie)
}
- return true
+ return true
}
func main() {
//此处参考了annie的代码
+ //fmt.Println(parser.GetStartURLs("http://www.chinesemooc.org/mooc/4880","pku_auth=161evS%2BQJtmq%2FGJRyU%2BFhfaNLyG88SrUPqUX5a0eOUW49JVtBaPxY7lt1vp2MvvcC9UaH8qYx3%2B0cSja0MeVNCmDSWRQ; pku_loginuser=univeroner%40gmail.com; pku_reward_log=daylogin%2C1173273; Hm_lvt_ff4f6e9862a4e0e16fd1f5a7f6f8953b=1569321857,1569494843,1569494850,1569759380; PHPSESSID=p72d5gqftbmp65mmr2n9ghrah5; pku__refer=%252Fmooc%252F4880; Hm_lpvt_ff4f6e9862a4e0e16fd1f5a7f6f8953b=1569761588"))
flag.Parse()
args := flag.Args()
@@ -63,10 +53,14 @@ func main() {
flag.PrintDefaults()
return
}
+ if config.Cookie != ""{
+ utils.ReadCookieFromFile(config.Cookie)
+ //fmt.Println(config.Cookie)
+ }
var isErr bool
//可以下载多个url
for _, videoURL := range args {
- if err := download(strings.TrimSpace(videoURL),config.ContentOptions); err {
+ if err := download(strings.TrimSpace(videoURL)); err {
isErr = true
}
}
diff --git a/parser/chinesemooc.go b/parser/chinesemooc.go
new file mode 100644
index 0000000..8bd3f0f
--- /dev/null
+++ b/parser/chinesemooc.go
@@ -0,0 +1,176 @@
+package parser
+
+import (
+ "fmt"
+ "github.com/PuerkitoBio/goquery"
+ "icourse/download"
+ "icourse/utils"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// 华文慕课下载部分
+func DownloadChinesemooc(url string,options string,cookie string){
+ courseName := GetChinesemoocName(url)
+ fmt.Println(courseName)
+ switch options{
+ case "all":
+ DownloadCMAll(url,courseName,cookie)
+ case "video":
+ DownloadCMVideos(url,courseName,cookie)
+ case "PPT":
+ DownloadCMPPTs(url,courseName,cookie)
+ }
+
+}
+
+//下载视频和课件
+func DownloadCMAll(url string,courseName,cookie string){
+ DownloadCMVideos(url,courseName,cookie)
+ DownloadCMPPTs(url,courseName,cookie)
+}
+
+//下载所有视频
+func DownloadCMVideos(url string,courseName,cookie string){
+ videoURL,_ := GetStartURLs(url,cookie)
+ //fmt.Println(videoURL)
+ files := ExtractCMVideo(videoURL,cookie)
+ //fmt.Println(files)
+ //可以直接下载
+ download.DownloadCookieFiles(files,courseName,cookie)
+}
+//下载所有课件
+func DownloadCMPPTs(url string,courseName,cookie string){
+ _,PPTURL := GetStartURLs(url,cookie)
+ //fmt.Println(PPTURL)
+ files := ExtractCMPPTs(PPTURL,cookie)
+ //使用cookie进行下载
+ download.DownloadCookieFiles(files,courseName,cookie)
+}
+
+
+//得到课程名称
+func GetChinesemoocName(url string)string {
+ data := utils.HttpGet(url)
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
+ utils.Check(err)
+ courseName := utils.Format(doc.Find("head > title").Text())
+ return courseName
+}
+
+func getVideoURL(url string,cookies string) string{
+ data:=utils.HttpGetCookie(url,cookies)
+ //设置长度,精确匹配
+ // 可以改进为自动匹配最高清晰度,,此处默认标清
+ //fmt.Println(data)
+ SDvideo := utils.MatchAll(string(data),`http.{100,200}SD.mp4`)
+ //
+ videoUrl := ""
+
+ //如果能选择清晰度的话选择高清
+ if SDvideo != nil{
+ videoUrl = SDvideo[0][0]
+ } else { //只有一种清晰度的情况
+ videoUrl = utils.MatchAll(string(data),"http.{100,200}.mp4")[0][0]
+ }
+ //fmt.Println(videoUrl)
+ re := regexp.MustCompile(`\\`)
+ videoUrl = re.ReplaceAllString(videoUrl,"")
+ return videoUrl
+}
+
+//返回待下载的视频文件列表
+func ExtractCMVideo(url string,cookies string) []utils.File{
+ //需要下载的文件集合
+ var files []utils.File
+ //把cookie加在文件里
+ data:=utils.HttpGetCookie(url,cookies)
+ //fmt.Println(data)
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
+ utils.Check(err)
+ doc.Find("#coursefile > div.file-lists > div").Each(func(i int, s *goquery.Selection) {
+ chapterName := s.Find(".main-item >span").Text()
+ //fmt.Println(chapterName)
+
+ s.Find("div.item-detail > ul > li.light.clearfix").Each(func(i int, p *goquery.Selection) {
+ //小节的标题
+ contentName := p.Find(".course-name >span").Text()
+ //fmt.Println(contentName)
+ //每一小节可能有多个视频
+ p.Find(".icon-spow-wrap > .video").Each(func(j int, q *goquery.Selection) {
+
+ filename,_ := q.Attr("original-title")
+ str,_ := q.Attr("href")
+ filePath := filepath.Join(chapterName,contentName,strconv.Itoa(j+1)+filename+".mp4")
+ //提取两个id
+ ID := utils.MatchAll(str,`&id=([0-9]*)`)[0][1]
+ eid := utils.MatchAll(str,`eid=([0-9]*)`)[0][1]
+ URL := "http://www.chinesemooc.org/api/course_video_watch.php?course_id="+ID+"&eid="+eid
+ videoURL := getVideoURL(URL,cookies)
+ //fmt.Println(filePath)
+ files = append(files, utils.File{videoURL,filePath})
+ })
+
+ })
+ })
+ return files
+}
+
+//返回待下载的课件文件列表
+func ExtractCMPPTs(url string,cookies string) []utils.File{
+ //需要下载的文件集合
+ var files []utils.File
+ //把cookie加在文件里
+ data:=utils.HttpGetCookie(url,cookies)
+ //fmt.Println(data)
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
+ utils.Check(err)
+ doc.Find("#coursefile > ul > li").Each(func(i int, s *goquery.Selection) {
+ chapterName := utils.Format(s.Find("div.title.clearfix").Text())
+ //fmt.Println(chapterName)
+ //每个章节底下的小节
+ s.Find(".download-list").Each(func(i int, p *goquery.Selection){
+ //提取小节名
+ contentName := p.Find(".download-list-tit").Text() + p.Find("ul > li.download-list-num > a").Text()
+ //fmt.Println(contentName)
+ filePath := filepath.Join(chapterName,contentName);
+ //fmt.Println(filePath)
+ str,_ := p.Find("span[onclick].download-load").Attr("onclick");
+ //fmt.Println(str)
+ //提取pdf下载地址
+ URL := utils.MatchAll(str,`window.open\("(.*)",`)[0][1]
+ //如果是相对路径
+ if(utils.MatchAll(URL,`http`) == nil){
+ URL = "http://www.chinesemooc.org/" +URL
+ }
+ //baseURL := "http://www.chinesemooc.org/"
+ //fmt.Println(URL)
+ files = append(files,utils.File{URL,filePath})
+ })
+ })
+ //注意下载的时候需要cookie
+ return files
+}
+
+//从起始地址得到视频和课件的下载页面
+func GetStartURLs(url string, cookies string) (string,string) {
+ data:=utils.HttpGetCookie(url,cookies)
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
+ utils.Check(err)
+ s := doc.Find("#top-select > div > div > button")
+ str,_ := s.Attr("onclick")
+ //fmt.Println(str)
+ //匹配classesid
+ //匹配courseid
+ classID := utils.MatchAll(str,`([0-9]*),`)[0][1]
+ courseID := utils.MatchAll(str,`,([0-9]*)`)[0][1]
+ //视频页面url
+ courseProgress := "http://www.chinesemooc.org/kvideo.php?do=course_progress&kvideoid=" + classID + "&classesid=" + courseID
+ //fmt.Println(courseProgress)
+ //课件页面url
+ courseCware :="http://www.chinesemooc.org/kvideo.php?do=course_cware_list&kvideoid=" + classID + "&classesid=" + courseID
+ return courseProgress,courseCware
+}
+
diff --git a/parser/parser.go b/parser/icourse.go
similarity index 66%
rename from parser/parser.go
rename to parser/icourse.go
index 118d9aa..92badbd 100755
--- a/parser/parser.go
+++ b/parser/icourse.go
@@ -1,6 +1,7 @@
package parser
import (
+ "fmt"
"github.com/PuerkitoBio/goquery"
"icourse/config"
"icourse/download"
@@ -14,37 +15,79 @@ import (
)
//此处放置部分页面特有大的处理函数
+//总的下载函数
+func DownloadIcourse(url string,options string) bool{
+ courseName := utils.Format(GetIcouseName(url))
+ id:=utils.MatchAll(url,`course_([0-9]*)`)
+
+ if id != nil{
+ //得到课程的id地址
+ idNum:=id[0][1]
+ //fmt.Println(idNum)
+ switch options{
+ case "all":
+ DownloadAll(idNum,courseName)
+ case "most":
+ DownloadMost(idNum,courseName)
+ case "videoPPT":
+ DownloadVideoPPT(idNum,courseName)
+ case "assignments":
+ DownloadAssignments(idNum,courseName)
+ case "testPaper":
+ DownloadTestPaper(idNum,courseName)
+ case "shareResource":
+ DownloadShareResource(idNum,courseName)
+ }
+ } else{
+ //网址不符合格式
+ fmt.Printf("this website %s is not supported now",url)
+ return true
+ }
+ return true
+}
+
+
+//获取课程名称,作为课程下载的目录
+func GetIcouseName(url string)string{
+ data:=utils.HttpGet(url)
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
+ utils.Check(err)
+ courseName := doc.Find("#introduction-body > section.container > section > section > div.course-introduction-infor.pull-left > div.course-title.clearfix").Text()
+ return courseName
+}
//不同下载选项的具体实现
-func DownloadAll(id string){
- DownloadMost(id)
- DownloadShareResource(id)
+func DownloadAll(id string,courseName string){
+ DownloadMost(id,courseName)
+ DownloadShareResource(id,courseName)
}
-func DownloadMost(id string){
- DownloadVideoPPT(id)
- DownloadAssignments(id)
- DownloadTestPaper(id)
+func DownloadMost(id string,courseName string){
+ DownloadVideoPPT(id,courseName)
+ DownloadAssignments(id,courseName)
+ DownloadTestPaper(id,courseName)
}
-func DownloadVideoPPT(id string){
+func DownloadVideoPPT(id string,courseName string){
s:=config.VideoPPT+id
files:=extractURLs(s)
- download.DownloadFiles(files)
+ download.DownloadFiles(files,courseName)
}
-func DownloadAssignments(id string){
+func DownloadAssignments(id string,courseName string){
s:=config.Assignments+id
files:=extractURLs(s)
- download.DownloadFiles(files)
+ download.DownloadFiles(files,courseName)
}
-func DownloadTestPaper(id string){
+func DownloadTestPaper(id string,courseName string){
s:=config.TestPaper+id
files:=extractURLs(s)
- download.DownloadFiles(files)
+ download.DownloadFiles(files,courseName)
}
-func DownloadShareResource(id string){
+func DownloadShareResource(id string,courseName string){
s :=config.ShareResource+id
files:=extractOthers(s)
- download.DownloadFiles(files)
+ download.DownloadFiles(files,courseName)
}
+
+
//根据sectionID,构造post请求,parentPath为前面的路径名,返回文件的数组
func getVideo(id string,parentPath string) []utils.File {
var files []utils.File
diff --git a/utils/utils.go b/utils/utils.go
index ac8be9a..372dae4 100755
--- a/utils/utils.go
+++ b/utils/utils.go
@@ -43,7 +43,7 @@ func FileExists(filePath string) bool{
//去除字符串里的空格换行等
func Format(str string)string{
- re := regexp.MustCompile("[\r\n\t]")
+ re := regexp.MustCompile("[\r\n\t ]")
res := re.ReplaceAllString(str, "")
return res
}
@@ -74,6 +74,21 @@ func MatchAll(text, pattern string) [][]string {
return value
}
+//获取url链接的域名
+// Domain get the domain of given URL
+func Domain(url string) string {
+ domainPattern := `([a-z0-9][-a-z0-9]{0,62})\.` +
+ `(com\.cn|com\.hk|` +
+ `cn|com|net|edu|gov|biz|org|info|pro|name|xxx|xyz|be|` +
+ `me|top|cc|tv|tt)`
+ domain := MatchOneOf(url, domainPattern)
+ if domain != nil {
+ return domain[1]
+ }
+ return "Universal"
+}
+
+
//根据url,构造get请求
func HttpGet(s string) string {
res, err := http.Get(s)
@@ -86,4 +101,33 @@ func HttpGet(s string) string {
}
body, err := ioutil.ReadAll(res.Body)
return string(body)
+}
+
+//根据url以及cookie构造get请求
+func HttpGetCookie(s string,cookie string) string {
+ client := &http.Client{}
+ reqest, err := http.NewRequest("GET", s, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+ reqest.Header.Set("Cookie",cookie)
+ res, _ := client.Do(reqest)
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
+ }
+ body, err := ioutil.ReadAll(res.Body)
+ //fmt.Println(string(body))
+ return string(body)
+}
+
+//从文件中读取cookie
+func ReadCookieFromFile(filePath string){
+ //如果cookie是一个文件并且存在
+ if _, fileErr := os.Stat(config.Cookie); fileErr == nil {
+ // Cookie is a file
+ data, err := ioutil.ReadFile(config.Cookie)
+ Check(err)
+ config.Cookie = string(data)
+ }
}
\ No newline at end of file