Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: #5

Open
djoudi opened this issue Nov 14, 2020 · 2 comments

Comments

@djoudi
Copy link

djoudi commented Nov 14, 2020

Course : adobe-illustrator-cc-2019-creer-et-vendre-des-t-shirts-sur-teespring-avec-facebook
Traceback (most recent call last):
File "main.py", line 107, in
get_course(url)
File "main.py", line 95, in get_course
os.mkdir(course_name + "/" +chapter)
OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'adobe-illustrator-cc-2019-creer-et-vendre-des-t-shirts-sur-teespring-avec-facebook/\n1. Présentation de la formation\n

@djoudi
Copy link
Author

djoudi commented Nov 15, 2020

@yhamidullah

@juhnny5
Copy link

juhnny5 commented Jan 5, 2021

Tu peux remplacer le contenu du fichier main.py par:

__author__ = "HAMIDULLAH Yasser"
__copyright__ = "Copyright 2020, MadaGeeksCar"
__credits__ = ["HAMIDULLAH Yasser"]
__license__ = "MIT"
__version__ = "1.0.1"
__maintainer__ = "HAMIDULLAH Yasser"
__email__ = "[email protected]"
__status__ = "Debug" 


from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import urllib.request,re,os,sys
from driver_support import *
from config import *

#get the available driver
driver = get_available_driver()
if driver == "":
    print("can't find any available driver")
    sys.exit()

LOGIN_URL = "https://www.alphorm.com/account/login"
def login(username,password):
    #load the login page
    driver.get (LOGIN_URL)

    #fill username and password field
    driver.find_element_by_id("username").send_keys(username)
    driver.find_element_by_id ("inputPassword1").send_keys(password)

    #find and click the login button
    driver.find_element_by_class_name("btn-connect").click()
def clean_text(text):
    return text.replace("/","_").replace("\"","").replace(":","_")

def get_course(url):
    #load the course page
    driver.get (url)
    
    #get the course name
    course_name = url.split("/")[4][19:]
    print("Course : ",course_name)

    #create the course folder if not exists
    if not os.path.isdir(course_name):
        os.mkdir(course_name)
    
    #go to the pladetaillé tab
    driver.find_elements_by_class_name("title-tab-tuto")[1]

    #get the lesson list and count for the loop range
    lessons = driver.find_elements_by_class_name("video_plan")
    num_links = len(lessons)
    vid_number = 1
    #loop over the lessons
    for i in range(num_links):
        #implicit wait to let the page load its full features
        #waiting util we can find the play button
        try:
            myElem = WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.CLASS_NAME, 'video_plan')))
        except TimeoutException:
            print ("Loading took too much time!")
        
        #click on a lesson
        lessons[i].click()

        #play the selected lesson
        #driver.find_element_by_class_name("jw-icon jw-icon-inline jw-button-color jw-reset jw-icon-playback").click()
        
        #get the page source for inspection
        page_soup = BeautifulSoup(driver.page_source, 'html.parser')

        #find the active lesson
        lesson = page_soup.find_all('div',class_="menu_point active")[0].div.div.a.text

        #find the active chapter
        chapter = page_soup.find_all('div',class_="menu_point active")[0].parent.p.text

        #get the video link for download
        video_link = page_soup.find_all('video',class_="jw-video jw-reset")[0]['src']
        
        #cleaning special chars
        chapter = re.sub('1234567890[!@#$%^&*()[]{};:,./<>?\|`~-=_+]éà', ' ', chapter.strip())
        chapter = clean_text(chapter)
        lesson = re.sub('1234567890[!@#$%^&*()[]{};:,./<>?\|`~-=_+]éà', ' ', lesson.strip())
        lesson = clean_text(lesson)

        #create chapter's folder if not exists
        if not os.path.isdir(course_name + "/" +chapter.strip()):
            os.mkdir(course_name + "/" +chapter.strip())
            vid_number = 1
        
        #download and save to folder
        print("Downloading : ",chapter,"/",lesson,".mp4")
        urllib.request.urlretrieve(video_link, course_name+"/"+chapter+"/"+str(vid_number)+"-"+lesson+".mp4")
        vid_number += 1

    print("Finished")
login(email,password)

for url in courses:
    get_course(url)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants