From 047e164a86a667de6cfbf448e7809ce21d100614 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 26 Mar 2020 16:44:28 +0100 Subject: [PATCH 1/5] Added a simulation flag to only extract video tile and urls without any file downloaded Removede an outdated argv check because all arguments are now parsed with yargs --- destreamer.ts | 92 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/destreamer.ts b/destreamer.ts index cbc24ac..e9cadd0 100644 --- a/destreamer.ts +++ b/destreamer.ts @@ -12,20 +12,36 @@ import sanitize = require('sanitize-filename') const args: string[] = process.argv.slice(2); // TODO: Remove this const argv = yargs.options({ - videoUrls: { type: 'array', demandOption: true }, - username: { type: 'string', demandOption: true }, - outputDirectory: { type: 'string', default: 'videos' }, - format: { alias:"f", - describe: 'Expose youtube-dl --format option, for details see\n https://github.com/ytdl-org/youtube-dl/blob/master/README.md#format-selection', - type:'string', - demandOption: false - } + videoUrls: { type: 'array', demandOption: true }, + username: { type: 'string', demandOption: true }, + outputDirectory: { type: 'string', default: 'videos' }, + format: { + alias:"f", + describe: 'Expose youtube-dl --format option, for details see\n https://github.com/ytdl-org/youtube-dl/blob/master/README.md#format-selection', + type:'string', + demandOption: false + }, + simulate: { + alias: "s", + describe: "If this is set to true no video will be downloaded and the script will log the video info (default: false)", + type: "boolean", + default: false, + demandOption: false + } + }).argv; -console.info('Video URLs: %s', argv.videoUrls); -console.info('Username: %s', argv.username); -console.info('Output Directory: %s', argv.outputDirectory); -console.info('Video/Audio Quality: %s', argv.format); +if (argv.simulate){ + console.info('Video URLs: %s', argv.videoUrls); + console.info('Username: %s', argv.username); + term.blue("There will be no video downloaded, it's only a simulation \n") +} else { + console.info('Video URLs: %s', argv.videoUrls); + console.info('Username: %s', argv.username); + console.info('Output Directory: %s', argv.outputDirectory); + console.info('Video/Audio Quality: %s', argv.format); +} + function sanityChecks() { try { @@ -52,11 +68,8 @@ function sanityChecks() { fs.mkdirSync(argv.outputDirectory); } - if (args[0] == null || args[0].length < 10) { - console.error('Pass in video URL as first argument:\n' + - 'Example: npm start https://www.microsoftstream.com/video/6f1a382b-e20c-44c0-98fc-5608286e48bc\n'); - process.exit(-1); - } + /* Removed check on the first argoumenti not being null or + longer than 10 since we have yargs now */ } async function rentVideoForLater(videoUrls: string[], username: string, outputDirectory: string) { @@ -92,20 +105,24 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi await sleep(4000); console.log('Looking up AMS stream locator...'); - // let amp: any; + let document: any; const amsUrl = await page.evaluate( // maybe there should be some check in case the url fetch fails - () => { return document?.querySelector(".azuremediaplayer")?.player?.cache_?.src; } + () => { + return document?.querySelector(".azuremediaplayer")?.player?.cache_?.src; + } ); // console.log(`Video url is: ${amsUrl}`); - console.log('Fetching title'); + console.log('Fetching title...'); let title = await page.evaluate( // Using optional chaining to return handle null case, generating default name - () => { return document?.querySelector(".title")?.textContent?.trim() ?? - `Video${videoUrls.indexOf(videoUrl)}`; } + () => { + return document?.querySelector(".title")?.textContent?.trim() ?? + `Video${videoUrls.indexOf(videoUrl)}`; + } ); // Implemented sanitize-filename as suggested in issue #11 @@ -114,22 +131,31 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi if (title == "") title = `Video${videoUrls.indexOf(videoUrl)}` - console.log(`Video title is: ${title}`); + //console.log(`Video title is: ${title}`); console.log('Constructing HLS URL...'); const hlsUrl = amsUrl.substring(0, amsUrl.lastIndexOf('/')) + '/manifest(format=m3u8-aapl)'; - console.log('Spawning youtube-dl with cookie and HLS URL...'); - let format = '' - if (argv.format) { - format = `-f "${argv.format}"` + // If the simulate flag is true skip the download + if (!argv.simulate) { + console.log('Spawning youtube-dl with cookie and HLS URL...'); + let format = '' + if (argv.format) { + format = `-f "${argv.format}"` + } + + const youtubedlCmd = 'youtube-dl --no-call-home --no-warnings ' + format + + ` --output "${outputDirectory}/${title}.mp4" --add-header Cookie:"${cookie}" "${hlsUrl}"` + + // console.log(`\n\n[DEBUG] Invoking youtube-dl: ${youtubedlCmd}\n\n`); + var result = execSync(youtubedlCmd, { stdio: 'inherit' }); + } else { + // Logging the video info + term.blue("Video title is: ") + console.log(`${title}`) + term.blue("Video url is: ") + console.log(`${hlsUrl}`) } - - const youtubedlCmd = 'youtube-dl --no-call-home --no-warnings ' + format + - ` --output "${outputDirectory}/${title}.mp4" --add-header Cookie:"${cookie}" "${hlsUrl}"` - - // console.log(`\n\n[DEBUG] Invoking youtube-dl: ${youtubedlCmd}\n\n`); - var result = execSync(youtubedlCmd, { stdio: 'inherit' }); } console.log("At this point Chrome's job is done, shutting it down..."); From 1b6ff66dee69c02fcf7fe97b1cb5f89f0ba4e9b9 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 26 Mar 2020 21:40:56 +0100 Subject: [PATCH 2/5] added axios package --- package-lock.json | 31 +++++++++++++++++++++++++++++++ package.json | 1 + 2 files changed, 32 insertions(+) diff --git a/package-lock.json b/package-lock.json index d8cf839..dd11a4e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -82,6 +82,14 @@ "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==" }, + "axios": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.19.2.tgz", + "integrity": "sha512-fjgm5MvRHLhx+osE2xoekY70AhARk3a6hkN+3Io1jc00jtquGvxYlKlsFUhmUET0V5te6CcZI7lcv2Ym61mjHA==", + "requires": { + "follow-redirects": "1.5.10" + } + }, "balanced-match": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", @@ -262,6 +270,29 @@ "path-exists": "^4.0.0" } }, + "follow-redirects": { + "version": "1.5.10", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz", + "integrity": "sha512-0V5l4Cizzvqt5D44aTXbFZz+FtyXV1vrDN6qrelxtfYQKW0KO0W2T/hkE8xvGa/540LkZlkaUjO4ailYTFtHVQ==", + "requires": { + "debug": "=3.1.0" + }, + "dependencies": { + "debug": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", + "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", + "requires": { + "ms": "2.0.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + } + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", diff --git a/package.json b/package.json index a6a1e08..7b20e83 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "@types/yargs": "^15.0.3" }, "dependencies": { + "axios": "^0.19.2", "puppeteer": "^2.1.1", "sanitize-filename": "^1.6.3", "terminal-kit": "^1.35.2", From ec407d1e9bae1eed12652bf901823bcb312cbc88 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 26 Mar 2020 21:43:21 +0100 Subject: [PATCH 3/5] minor change in th epage loading created function that fetchs title and hlsUrl from the Microsoft api after beeing authenticated with the token fetched from the page --- destreamer.ts | 91 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 32 deletions(-) diff --git a/destreamer.ts b/destreamer.ts index e9cadd0..7b84b8f 100644 --- a/destreamer.ts +++ b/destreamer.ts @@ -5,10 +5,16 @@ import fs from 'fs'; import path from 'path'; import { BrowserTests } from './BrowserTests'; import yargs = require('yargs'); -import sanitize = require('sanitize-filename') +import sanitize = require('sanitize-filename'); +import axios from 'axios'; + + +/** + * exitCode 25 = cannot split videoID from videUrl + * exitCode 27 = no hlsUrl in the api + * exitCode 88 = error extracting cookies + */ -// Type in your username here (the one you use to -// login to Microsoft Stream). const args: string[] = process.argv.slice(2); // TODO: Remove this const argv = yargs.options({ @@ -69,7 +75,7 @@ function sanityChecks() { } /* Removed check on the first argoumenti not being null or - longer than 10 since we have yargs now */ + longer than 10 since we use yargs now */ } async function rentVideoForLater(videoUrls: string[], username: string, outputDirectory: string) { @@ -84,7 +90,7 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi // This breaks on slow connections, needs more reliable logic //const oidcUrl = "https://login.microsoftonline.com/common/oauth2/authorize?client_id=cf53fce8-def6-4aeb-8d30-b158e7b1cf83&response_mode=form_post&response_type=code+id_token&scope=openid+profile&state=OpenIdConnect.AuthenticationProperties%3d1VtrsKV5QUHtzn8cDWL4wJmacu-VHH_DfpPxMQBhnfbar-_e8X016GGJDPfqfvcyUK3F3vBoiFwUpahR2ANfrzHE469vcw7Mk86wcAqBGXCvAUmv59MDU_OZFHpSL360oVRBo84GfVXAKYdhCjhPtelRHLHEM_ADiARXeMdVTAO3SaTiVQMhw3c9vLWuXqrKKevpI7E5esCQy5V_dhr2Q7kKrlW3gHX0232b8UWAnSDpc-94&nonce=636832485747560726.NzMyOWIyYWQtM2I3NC00MmIyLTg1NTMtODBkNDIwZTI1YjAxNDJiN2JkNDMtMmU5Ni00OTc3LWFkYTQtNTNlNmUwZmM1NTVl&nonceKey=OpenIdConnect.nonce.F1tPks6em0M%2fWMwvatuGWfFM9Gj83LwRKLvbx9rYs5M%3d&site_id=500453&redirect_uri=https%3a%2f%2fmsit.microsoftstream.com%2f&post_logout_redirect_uri=https%3a%2f%2fproducts.office.com%2fmicrosoft-stream&msafed=0"; - await page.goto(videoUrls[0], { waitUntil: 'networkidle2' }); + await page.goto(videoUrls[0], { waitUntil: "networkidle2" }); await page.waitForSelector('input[type="email"]'); await page.keyboard.type(username); await page.click('input[type="submit"]'); @@ -95,7 +101,11 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi console.log('Sorry, i mean "you".'); for (let videoUrl of videoUrls) { - await page.goto(videoUrl, { waitUntil: 'networkidle2' }); + let videoID = videoUrl.split('/').pop() ?? (console.error("Couldn't split the videoID, wrong url"), process.exit(25)) + + // changed waitUntil value to load (page completly loaded) + await page.goto(videoUrl, { waitUntil: 'load' }); + await sleep(2000); // try this instead of hardcoding sleep // https://github.com/GoogleChrome/puppeteer/issues/3649 @@ -104,39 +114,20 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi console.log('Got cookie. Consuming cookie...'); await sleep(4000); - console.log('Looking up AMS stream locator...'); + console.log("Accessing API..."); - let document: any; - const amsUrl = await page.evaluate( - // maybe there should be some check in case the url fetch fails + let sessionInfo: any; + var accesToken = await page.evaluate( () => { - return document?.querySelector(".azuremediaplayer")?.player?.cache_?.src; + return sessionInfo.AccessToken; } ); - // console.log(`Video url is: ${amsUrl}`); - console.log('Fetching title...'); + console.log("Fetching title and HLS URL...") + var [title, hlsUrl] = await getVideoInfo(videoID, accesToken) - let title = await page.evaluate( - // Using optional chaining to return handle null case, generating default name - () => { - return document?.querySelector(".title")?.textContent?.trim() ?? - `Video${videoUrls.indexOf(videoUrl)}`; - } - ); + title = (sanitize(title) == "") ? `Video${videoUrls.indexOf(videoUrl)}` : sanitize(title) - // Implemented sanitize-filename as suggested in issue #11 - title = sanitize(title) - - if (title == "") - title = `Video${videoUrls.indexOf(videoUrl)}` - - //console.log(`Video title is: ${title}`); - - console.log('Constructing HLS URL...'); - const hlsUrl = amsUrl.substring(0, amsUrl.lastIndexOf('/')) + '/manifest(format=m3u8-aapl)'; - - // If the simulate flag is true skip the download if (!argv.simulate) { console.log('Spawning youtube-dl with cookie and HLS URL...'); let format = '' @@ -186,6 +177,42 @@ async function exfiltrateCookie(page: puppeteer.Page) { return `Authorization=${authzCookie.value}; Signature=${sigCookie.value}`; } + +async function getVideoInfo(videoID: string, accesToken: string) { + let title: string; + let hlsUrl: string; + + let content = axios.get( + `https://euwe-1.api.microsoftstream.com/api/videos/${videoID}?$expand=creator,tokens,status,liveEvent,extensions&api-version=1.3-private`, + { + headers: { + Authorization: "Bearer " + accesToken + } + }) + .then(function (response) { + return response.data + }) + .catch(function (error) { + console.error(error) + }) + + + title = await content.then(data => { + return data["name"]; + }) + + hlsUrl = await content.then(data => { + for (const item of data["playbackUrls"]) { + if (item["mimeType"] == "application/vnd.apple.mpegurl") + return item["playbackUrl"] + } + console.error("Error fetching hlsUrl") + process.exit(27) + }) + + return [title, hlsUrl]; +} + // We should probably use Mocha or something if (args[0] === 'test') { From 089fec600120ecf227b3ae55b4c1633006f8f561 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 29 Mar 2020 21:41:53 +0200 Subject: [PATCH 4/5] fixed the import syntax now the simulation flag properly translate in youtube-del flag too --- destreamer.ts | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/destreamer.ts b/destreamer.ts index 7b84b8f..f3605ab 100644 --- a/destreamer.ts +++ b/destreamer.ts @@ -4,14 +4,14 @@ import { terminal as term } from 'terminal-kit'; import fs from 'fs'; import path from 'path'; import { BrowserTests } from './BrowserTests'; -import yargs = require('yargs'); -import sanitize = require('sanitize-filename'); +import yargs from 'yargs' +import sanitize from 'sanitize-filename' import axios from 'axios'; /** * exitCode 25 = cannot split videoID from videUrl - * exitCode 27 = no hlsUrl in the api + * exitCode 27 = no hlsUrl in the API response * exitCode 88 = error extracting cookies */ @@ -41,11 +41,13 @@ if (argv.simulate){ console.info('Video URLs: %s', argv.videoUrls); console.info('Username: %s', argv.username); term.blue("There will be no video downloaded, it's only a simulation \n") + console.log("\n") } else { console.info('Video URLs: %s', argv.videoUrls); console.info('Username: %s', argv.username); console.info('Output Directory: %s', argv.outputDirectory); console.info('Video/Audio Quality: %s', argv.format); + console.log("\n") } @@ -128,25 +130,22 @@ async function rentVideoForLater(videoUrls: string[], username: string, outputDi title = (sanitize(title) == "") ? `Video${videoUrls.indexOf(videoUrl)}` : sanitize(title) - if (!argv.simulate) { - console.log('Spawning youtube-dl with cookie and HLS URL...'); - let format = '' - if (argv.format) { - format = `-f "${argv.format}"` - } + term.blue("Video title is: ") + console.log(`${title} \n`) - const youtubedlCmd = 'youtube-dl --no-call-home --no-warnings ' + format + - ` --output "${outputDirectory}/${title}.mp4" --add-header Cookie:"${cookie}" "${hlsUrl}"` + console.log('Spawning youtube-dl with cookie and HLS URL...'); - // console.log(`\n\n[DEBUG] Invoking youtube-dl: ${youtubedlCmd}\n\n`); - var result = execSync(youtubedlCmd, { stdio: 'inherit' }); - } else { - // Logging the video info - term.blue("Video title is: ") - console.log(`${title}`) - term.blue("Video url is: ") - console.log(`${hlsUrl}`) - } + const format = argv.format ? `-f "${argv.format}"` : "" + + var youtubedlCmd = 'youtube-dl --no-call-home --no-warnings ' + format + + ` --output "${outputDirectory}/${title}.mp4" --add-header ` + + `Cookie:"${cookie}" "${hlsUrl}"` + + if (argv.simulate) + youtubedlCmd = youtubedlCmd + " -s" + + // console.log(`\n\n[DEBUG] Invoking youtube-dl: ${youtubedlCmd}\n\n`); + var result = execSync(youtubedlCmd, { stdio: 'inherit' }); } console.log("At this point Chrome's job is done, shutting it down..."); From 0cb8594d0fe0b1b07aaea4c229cc50493270e9ea Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 29 Mar 2020 22:56:01 +0200 Subject: [PATCH 5/5] added verbose option for additional information fixed the response error printing --- destreamer.ts | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/destreamer.ts b/destreamer.ts index f3605ab..8741574 100644 --- a/destreamer.ts +++ b/destreamer.ts @@ -12,9 +12,11 @@ import axios from 'axios'; /** * exitCode 25 = cannot split videoID from videUrl * exitCode 27 = no hlsUrl in the API response + * exitCode 29 = invalid response from API * exitCode 88 = error extracting cookies */ +const ApiVersion = "1.3-private" const args: string[] = process.argv.slice(2); // TODO: Remove this const argv = yargs.options({ @@ -23,18 +25,27 @@ const argv = yargs.options({ outputDirectory: { type: 'string', default: 'videos' }, format: { alias:"f", - describe: 'Expose youtube-dl --format option, for details see\n https://github.com/ytdl-org/youtube-dl/blob/master/README.md#format-selection', + describe: `Expose youtube-dl --format option, for details see\n + https://github.com/ytdl-org/youtube-dl/blob/master/README.md#format-selection`, type:'string', demandOption: false }, simulate: { alias: "s", - describe: "If this is set to true no video will be downloaded and the script will log the video info (default: false)", + describe: `If this is set to true no video will be downloaded and the script + will log the video info (default: false)`, + type: "boolean", + default: false, + demandOption: false + }, + verbose: { + alias: "v", + describe: `Print additional informations to the console + (don't use this if you don't need/ not told to)`, type: "boolean", default: false, demandOption: false } - }).argv; if (argv.simulate){ @@ -182,17 +193,24 @@ async function getVideoInfo(videoID: string, accesToken: string) { let hlsUrl: string; let content = axios.get( - `https://euwe-1.api.microsoftstream.com/api/videos/${videoID}?$expand=creator,tokens,status,liveEvent,extensions&api-version=1.3-private`, + `https://euwe-1.api.microsoftstream.com/api/videos/${videoID}` + + `?$expand=creator,tokens,status,liveEvent,extensions&api-version=${ApiVersion}`, { headers: { - Authorization: "Bearer " + accesToken + Authorization: `Bearer ${accesToken}` } }) .then(function (response) { return response.data }) .catch(function (error) { - console.error(error) + term.red("ERROR ") + console.error(error.response.status) + console.error("Exiting...") + if (argv.verbose) + console.error(error) + + process.exit(29) }) @@ -201,6 +219,9 @@ async function getVideoInfo(videoID: string, accesToken: string) { }) hlsUrl = await content.then(data => { + if (argv.verbose) + console.log(JSON.stringify(data, undefined, 2)) + for (const item of data["playbackUrls"]) { if (item["mimeType"] == "application/vnd.apple.mpegurl") return item["playbackUrl"]