diff --git a/.gitignore b/.gitignore index 1c0c3bd..d14b7cd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,12 @@ *.log *.js *.zip +*.xml + +yarn.lock .chrome_data node_modules videos release build -yarn.lock \ No newline at end of file diff --git a/src/ApiClient.ts b/src/ApiClient.ts index ac7a7b6..ace1704 100644 --- a/src/ApiClient.ts +++ b/src/ApiClient.ts @@ -1,16 +1,16 @@ import { logger } from './Logger'; -import { Session } from './Types'; +import { StreamSession } from './Types'; import axios, { AxiosRequestConfig, AxiosResponse, AxiosInstance, AxiosError } from 'axios'; import axiosRetry, { isNetworkOrIdempotentRequestError } from 'axios-retry'; -export class ApiClient { - private static instance: ApiClient; +export class StreamApiClient { + private static instance: StreamApiClient; private axiosInstance?: AxiosInstance; - private session?: Session; + private session?: StreamSession; - private constructor(session?: Session) { + private constructor(session?: StreamSession) { this.session = session; this.axiosInstance = axios.create({ baseURL: session?.ApiGatewayUri, @@ -50,16 +50,16 @@ export class ApiClient { * * @param session used if initializing */ - public static getInstance(session?: Session): ApiClient { - if (!ApiClient.instance) { - ApiClient.instance = new ApiClient(session); + public static getInstance(session?: StreamSession): StreamApiClient { + if (!StreamApiClient.instance) { + StreamApiClient.instance = new StreamApiClient(session); } - return ApiClient.instance; + return StreamApiClient.instance; } - public setSession(session: Session): void { - if (!ApiClient.instance) { + public setSession(session: StreamSession): void { + if (!StreamApiClient.instance) { logger.warn("Trying to update ApiCient session when it's not initialized!"); } diff --git a/src/CommandLineParser.ts b/src/CommandLineParser.ts index b2ac1ee..694926a 100644 --- a/src/CommandLineParser.ts +++ b/src/CommandLineParser.ts @@ -1,5 +1,5 @@ import { CLI_ERROR, ERROR_CODE } from './Errors'; -import { checkOutDir } from './Utils'; +import { makeOutDir } from './Utils'; import { logger } from './Logger'; import { templateElements } from './Types'; @@ -9,7 +9,7 @@ import sanitize from 'sanitize-filename'; import yargs from 'yargs'; -export const argv: any = yargs.options({ +export const argv = yargs.options({ username: { alias: 'u', type: 'string', @@ -114,7 +114,7 @@ export const argv: any = yargs.options({ .check(() => noArguments()) .check((argv: any) => checkInputConflicts(argv.videoUrls, argv.inputFile)) .check((argv: any) => { - if (checkOutDir(argv.outputDirectory)) { + if (makeOutDir(argv.outputDirectory)) { return true; } else { diff --git a/src/Downloaders.ts b/src/Downloaders.ts new file mode 100644 index 0000000..bb7bdaa --- /dev/null +++ b/src/Downloaders.ts @@ -0,0 +1,172 @@ +import { StreamApiClient } from './ApiClient'; +import { argv } from './CommandLineParser'; +import { ERROR_CODE } from './Errors'; +import { logger } from './Logger'; +import { doStreamLogin } from './LoginModules'; +import { drawThumbnail } from './Thumbnail'; +import { refreshSession, TokenCache } from './TokenCache'; +import { StreamVideo, VideoUrl } from './Types'; +import { ffmpegTimemarkToChunk } from './Utils'; +import { createUniquePath, getStreamInfo } from './VideoUtils'; + +import cliProgress from 'cli-progress'; +import fs from 'fs'; + + +const { FFmpegCommand, FFmpegInput, FFmpegOutput } = require('@tedconf/fessonia')(); +const tokenCache: TokenCache = new TokenCache(); + + +export async function downloadStreamVideo(videoUrls: Array): Promise { + + let session = tokenCache.Read() ?? await doStreamLogin('https://web.microsoftstream.com/', tokenCache, argv.username); + logger.verbose('Session and API info \n' + + '\t API Gateway URL: '.cyan + session.ApiGatewayUri + '\n' + + '\t API Gateway version: '.cyan + session.ApiGatewayVersion + '\n'); + + + logger.info('Fetching videos info... \n'); + + const videos: Array = createUniquePath( + await getStreamInfo(videoUrls, session, argv.closedCaptions), + argv.outputTemplate, argv.format, argv.skip + ); + + if (argv.simulate) { + videos.forEach((video: StreamVideo) => { + logger.info( + '\nTitle: '.green + video.title + + '\nOutPath: '.green + video.outPath + + '\nPublished Date: '.green + video.publishDate + + '\nPlayback URL: '.green + video.playbackUrl + + ((video.captionsUrl) ? ('\nCC URL: '.green + video.captionsUrl) : '') + ); + }); + + return; + } + + for (const [index, video] of videos.entries()) { + + if (argv.skip && fs.existsSync(video.outPath)) { + logger.info(`File already exists, skipping: ${video.outPath} \n`); + continue; + } + + if (argv.keepLoginCookies && index !== 0) { + logger.info('Trying to refresh token...'); + session = await refreshSession('https://web.microsoftstream.com/video/' + video.guid); + StreamApiClient.getInstance().setSession(session); + } + + const pbar: cliProgress.SingleBar = new cliProgress.SingleBar({ + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + format: 'progress [{bar}] {percentage}% {speed} {eta_formatted}', + // process.stdout.columns may return undefined in some terminals (Cygwin/MSYS) + barsize: Math.floor((process.stdout.columns || 30) / 3), + stopOnComplete: true, + hideCursor: true, + }); + + logger.info(`\nDownloading Video: ${video.title} \n`); + logger.verbose('Extra video info \n' + + '\t Video m3u8 playlist URL: '.cyan + video.playbackUrl + '\n' + + '\t Video tumbnail URL: '.cyan + video.posterImageUrl + '\n' + + '\t Video subtitle URL (may not exist): '.cyan + video.captionsUrl + '\n' + + '\t Video total chunks: '.cyan + video.totalChunks + '\n'); + + logger.info('Spawning ffmpeg with access token and HLS URL. This may take a few seconds...\n\n'); + if (!process.stdout.columns) { + logger.warn( + 'Unable to get number of columns from terminal.\n' + + 'This happens sometimes in Cygwin/MSYS.\n' + + 'No progress bar can be rendered, however the download process should not be affected.\n\n' + + 'Please use PowerShell or cmd.exe to run destreamer on Windows.' + ); + } + + const headers: string = 'Authorization: Bearer ' + session.AccessToken; + + if (!argv.noExperiments) { + if (video.posterImageUrl) { + await drawThumbnail(video.posterImageUrl, session); + } + } + + const ffmpegInpt: any = new FFmpegInput(video.playbackUrl, new Map([ + ['headers', headers] + ])); + const ffmpegOutput: any = new FFmpegOutput(video.outPath, new Map([ + argv.acodec === 'none' ? ['an', null] : ['c:a', argv.acodec], + argv.vcodec === 'none' ? ['vn', null] : ['c:v', argv.vcodec], + ['n', null] + ])); + const ffmpegCmd: any = new FFmpegCommand(); + + const cleanupFn: () => void = () => { + pbar.stop(); + + if (argv.noCleanup) { + return; + } + + try { + fs.unlinkSync(video.outPath); + } + catch (e) { + // Future handling of an error (maybe) + } + }; + + pbar.start(video.totalChunks, 0, { + speed: '0' + }); + + // prepare ffmpeg command line + ffmpegCmd.addInput(ffmpegInpt); + ffmpegCmd.addOutput(ffmpegOutput); + if (argv.closedCaptions && video.captionsUrl) { + const captionsInpt: any = new FFmpegInput(video.captionsUrl, new Map([ + ['headers', headers] + ])); + + ffmpegCmd.addInput(captionsInpt); + } + + ffmpegCmd.on('update', async (data: any) => { + const currentChunks: number = ffmpegTimemarkToChunk(data.out_time); + + pbar.update(currentChunks, { + speed: data.bitrate + }); + + // Graceful fallback in case we can't get columns (Cygwin/MSYS) + if (!process.stdout.columns) { + process.stdout.write(`--- Speed: ${data.bitrate}, Cursor: ${data.out_time}\r`); + } + }); + + process.on('SIGINT', cleanupFn); + + // let the magic begin... + await new Promise((resolve: any) => { + ffmpegCmd.on('error', (error: any) => { + cleanupFn(); + + logger.error(`FFmpeg returned an error: ${error.message}`); + process.exit(ERROR_CODE.UNK_FFMPEG_ERROR); + }); + + ffmpegCmd.on('success', () => { + pbar.update(video.totalChunks); // set progress bar to 100% + logger.info(`\nDownload finished: ${video.outPath} \n`); + resolve(); + }); + + ffmpegCmd.spawn(); + }); + + process.removeListener('SIGINT', cleanupFn); + } +} diff --git a/src/LoginModules.ts b/src/LoginModules.ts new file mode 100644 index 0000000..2af59bb --- /dev/null +++ b/src/LoginModules.ts @@ -0,0 +1,84 @@ +import { logger } from './Logger'; +import puppeteer from 'puppeteer'; +import { getPuppeteerChromiumPath } from './PuppeteerHelper'; +import { chromeCacheFolder } from './destreamer'; +import { argv } from './CommandLineParser'; +import { StreamSession } from './Types'; +import { ERROR_CODE } from './Errors'; +import { TokenCache } from './TokenCache'; + + +export async function doStreamLogin(url: string, tokenCache: TokenCache, username?: string): Promise { + + logger.info('Launching headless Chrome to perform the OpenID Connect dance...'); + + const browser: puppeteer.Browser = await puppeteer.launch({ + executablePath: getPuppeteerChromiumPath(), + headless: false, + userDataDir: (argv.keepLoginCookies) ? chromeCacheFolder : undefined, + args: [ + '--disable-dev-shm-usage', + '--fast-start', + '--no-sandbox' + ] + }); + const page: puppeteer.Page = (await browser.pages())[0]; + + logger.info('Navigating to login page...'); + await page.goto(url, { waitUntil: 'load' }); + + try { + if (username) { + await page.waitForSelector('input[type="email"]', { timeout: 3000 }); + await page.keyboard.type(username); + await page.click('input[type="submit"]'); + } + else { + /* If a username was not provided we let the user take actions that + lead up to the video page. */ + } + } + catch (e) { + /* If there is no email input selector we aren't in the login module, + we are probably using the cache to aid the login. + It could finish the login on its own if the user said 'yes' when asked to + remember the credentials or it could still prompt the user for a password */ + } + + await browser.waitForTarget((target: puppeteer.Target) => target.url().endsWith('microsoftstream.com/'), { timeout: 150000 }); + logger.info('We are logged in.'); + + let session: StreamSession | null = null; + let tries = 1; + while (!session) { + try { + let sessionInfo: any; + session = await page.evaluate( + () => { + return { + AccessToken: sessionInfo.AccessToken, + ApiGatewayUri: sessionInfo.ApiGatewayUri, + ApiGatewayVersion: sessionInfo.ApiGatewayVersion + }; + } + ); + } + catch (error) { + if (tries > 5) { + process.exit(ERROR_CODE.NO_SESSION_INFO); + } + + session = null; + tries++; + await page.waitForTimeout(3000); + } + } + + tokenCache.Write(session); + logger.info('Wrote access token to token cache.'); + logger.info("At this point Chromium's job is done, shutting it down...\n"); + + await browser.close(); + + return session; +} diff --git a/src/Thumbnail.ts b/src/Thumbnail.ts index 0502977..f65c0e2 100644 --- a/src/Thumbnail.ts +++ b/src/Thumbnail.ts @@ -1,12 +1,12 @@ -import { ApiClient } from './ApiClient'; -import { Session } from './Types'; +import { StreamApiClient } from './ApiClient'; +import { StreamSession } from './Types'; import terminalImage from 'terminal-image'; import { AxiosResponse } from 'axios'; -export async function drawThumbnail(posterImage: string, session: Session): Promise { - const apiClient: ApiClient = ApiClient.getInstance(session); +export async function drawThumbnail(posterImage: string, session: StreamSession): Promise { + const apiClient: StreamApiClient = StreamApiClient.getInstance(session); const thumbnail: Buffer = await apiClient.callUrl(posterImage, 'get', null, 'arraybuffer') .then((response: AxiosResponse | undefined) => response?.data); diff --git a/src/TokenCache.ts b/src/TokenCache.ts index 2a3a0e4..363a651 100644 --- a/src/TokenCache.ts +++ b/src/TokenCache.ts @@ -2,7 +2,7 @@ import { chromeCacheFolder } from './destreamer'; import { ERROR_CODE } from './Errors'; import { logger } from './Logger'; import { getPuppeteerChromiumPath } from './PuppeteerHelper'; -import { Session } from './Types'; +import { StreamSession } from './Types'; import fs from 'fs'; import jwtDecode from 'jwt-decode'; @@ -12,14 +12,14 @@ import puppeteer from 'puppeteer'; export class TokenCache { private tokenCacheFile = '.token_cache'; - public Read(): Session | null { + public Read(): StreamSession | null { if (!fs.existsSync(this.tokenCacheFile)) { logger.warn(`${this.tokenCacheFile} not found. \n`); return null; } - const session: Session = JSON.parse(fs.readFileSync(this.tokenCacheFile, 'utf8')); + const session: StreamSession = JSON.parse(fs.readFileSync(this.tokenCacheFile, 'utf8')); type Jwt = { [key: string]: any @@ -41,7 +41,7 @@ export class TokenCache { return session; } - public Write(session: Session): void { + public Write(session: StreamSession): void { const s: string = JSON.stringify(session, null, 4); fs.writeFile(this.tokenCacheFile, s, (err: any) => { if (err) { @@ -54,7 +54,7 @@ export class TokenCache { } -export async function refreshSession(url: string): Promise { +export async function refreshSession(url: string): Promise { const videoId: string = url.split('/').pop() ?? process.exit(ERROR_CODE.INVALID_VIDEO_GUID); const browser: puppeteer.Browser = await puppeteer.launch({ @@ -73,7 +73,7 @@ export async function refreshSession(url: string): Promise { await browser.waitForTarget((target: puppeteer.Target) => target.url().includes(videoId), { timeout: 30000 }); - let session: Session | null = null; + let session: StreamSession | null = null; let tries = 1; while (!session) { diff --git a/src/Types.ts b/src/Types.ts index b477354..1ec0c05 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -1,11 +1,27 @@ -export type Session = { +export type StreamSession = { AccessToken: string; ApiGatewayUri: string; ApiGatewayVersion: string; } -export type Video = { +export type VideoUrl = { + url: string, + outDir: string +} + + +export type SharepointVideo = { + // if we can download the MP4 or we need to use DASH + direct: boolean; + playbackUrl: string; + title: string; + outPath: string +} + + +export type StreamVideo = { + guid: string; title: string; duration: string; publishDate: string; diff --git a/src/Utils.ts b/src/Utils.ts index 3d3036e..7b6a906 100644 --- a/src/Utils.ts +++ b/src/Utils.ts @@ -1,47 +1,63 @@ -import { ApiClient } from './ApiClient'; +import { StreamApiClient } from './ApiClient'; import { ERROR_CODE } from './Errors'; import { logger } from './Logger'; -import { Session } from './Types'; +import { StreamSession, VideoUrl } from './Types'; import { AxiosResponse } from 'axios'; import { execSync } from 'child_process'; import fs from 'fs'; -async function extractGuids(url: string, client: ApiClient): Promise | null> { +const streamUrlRegex = new RegExp(/https?:\/\/web\.microsoftstream\.com.*/); +const shareUrlRegex = new RegExp(/https?:\/\/.+\.sharepoint\.com.*/); + +/** we place the guid in the url fild in the return */ +export async function extractStreamGuids(urlList: Array, session: StreamSession): Promise> { const videoRegex = new RegExp(/https:\/\/.*\/video\/(\w{8}-(?:\w{4}-){3}\w{12})/); const groupRegex = new RegExp(/https:\/\/.*\/group\/(\w{8}-(?:\w{4}-){3}\w{12})/); + // const sharepointDirect = new RegExp(/https:\/\/(?.+\.sharepoint\.com)\/(?:.*\/)?(?.*\.mp4)/); + // const sharepointEncoded = new RegExp(/https:\/\/(?.+\.sharepoint\.com)\/.*id=(?.*mp4)/); - const videoMatch: RegExpExecArray | null = videoRegex.exec(url); - const groupMatch: RegExpExecArray | null = groupRegex.exec(url); + const apiClient: StreamApiClient = StreamApiClient.getInstance(session); + const guidList: Array = []; - if (videoMatch) { - return [videoMatch[1]]; - } - else if (groupMatch) { - const videoNumber: number = await client.callApi(`groups/${groupMatch[1]}`, 'get') - .then((response: AxiosResponse | undefined) => response?.data.metrics.videos); - const result: Array = []; + for (const url of urlList) { + const videoMatch: RegExpExecArray | null = videoRegex.exec(url.url); + const groupMatch: RegExpExecArray | null = groupRegex.exec(url.url); - // Anything above $top=100 results in 400 Bad Request - // Use $skip to skip the first 100 and get another 100 and so on - for (let index = 0; index <= Math.floor(videoNumber / 100); index++) { - const partial: Array = await client.callApi( - `groups/${groupMatch[1]}/videos?$skip=${100 * index}&` + - '$top=100&$orderby=publishedDate asc', 'get') - .then( - (response: AxiosResponse | undefined) => - response?.data.value.map((item: any) => item.id) - ); - - result.push(...partial); + if (videoMatch) { + guidList.push({ + url: videoMatch[1], + outDir: url.outDir + }); } + else if (groupMatch) { + const videoNumber: number = await apiClient.callApi(`groups/${groupMatch[1]}`, 'get') + .then((response: AxiosResponse | undefined) => response?.data.metrics.videos); - return result; + // Anything above $top=100 results in 400 Bad Request + // Use $skip to skip the first 100 and get another 100 and so on + for (let index = 0; index <= Math.floor(videoNumber / 100); index++) { + await apiClient.callApi( + `groups/${groupMatch[1]}/videos?$skip=${100 * index}&` + + '$top=100&$orderby=publishedDate asc', 'get' + ).then((response: AxiosResponse | undefined) => { + response?.data.value.forEach((video: { id: string }) => + guidList.push({ + url: video.id, + outDir: url.outDir + }) + ); + }); + } + } + else { + logger.warn(`Invalid url '${url.url}', skipping...`); + } } - return null; + return guidList; } @@ -52,30 +68,32 @@ async function extractGuids(url: string, client: ApiClient): Promise} urlList list of link to parse * @param {string} defaultOutDir the directry used to save the videos - * @param {Session} session used to call the API to get the GUIDs from group links * - * @returns Array of 2 elements, 1st one being the GUIDs array, 2nd one the output directories array + * @returns Array of 2 elements: 1st an array of Microsoft Stream urls, 2nd an array of SharePoint urls */ -export async function parseCLIinput(urlList: Array, defaultOutDir: string, - session: Session): Promise>> { - - const apiClient: ApiClient = ApiClient.getInstance(session); - const guidList: Array = []; +export function parseCLIinput(urlList: Array, defaultOutDir: string): Array> { + const stream: Array = []; + const share: Array = []; for (const url of urlList) { - const guids: Array | null = await extractGuids(url, apiClient); - - if (guids) { - guidList.push(...guids); + if (streamUrlRegex.test(url)) { + stream.push({ + url: url, + outDir: defaultOutDir + }); + } + else if (shareUrlRegex.test(url)) { + share.push({ + url: url, + outDir: defaultOutDir + }); } else { logger.warn(`Invalid url '${url}', skipping..`); } } - const outDirList: Array = Array(guidList.length).fill(defaultOutDir); - - return [guidList, outDirList]; + return [stream, share]; } @@ -86,94 +104,84 @@ export async function parseCLIinput(urlList: Array, defaultOutDir: strin * * @param {string} inputFile path to the text file * @param {string} defaultOutDir the default/fallback directory used to save the videos - * @param {Session} session used to call the API to get the GUIDs from group links * * @returns Array of 2 elements, 1st one being the GUIDs array, 2nd one the output directories array */ -export async function parseInputFile(inputFile: string, defaultOutDir: string, - session: Session): Promise>> { +export function parseInputFile(inputFile: string, defaultOutDir: string): Array> { // rawContent is a list of each line of the file - const rawContent: Array = fs.readFileSync(inputFile).toString() - .split(/\r?\n/); - const apiClient: ApiClient = ApiClient.getInstance(session); - - const guidList: Array = []; - const outDirList: Array = []; - // if the last line was an url set this - let foundUrl = false; + const rawContent: Array = fs.readFileSync(inputFile).toString().split(/\r?\n/); + const stream: Array = []; + const share: Array = []; + let streamUrl = false; for (let i = 0; i < rawContent.length; i++) { const line: string = rawContent[i]; + const nextLine: string | null = i < rawContent.length ? rawContent[i + 1] : null; + let outDir = defaultOutDir; // filter out lines with no content if (!line.match(/\S/)) { logger.warn(`Line ${i + 1} is empty, skipping..`); continue; } - // parse if line is option - else if (line.includes('-dir')) { - if (foundUrl) { - const outDir: string | null = parseOption('-dir', line); + // check for urls + else if (streamUrlRegex.test(line)) { + streamUrl = true; + } + else if (shareUrlRegex.test(line)) { + streamUrl = false; + } + // now invalid line since we skip ahead one line if we find dir option + else { + logger.warn(`Line ${i + 1}: '${line}' is invalid, skipping..`); - if (outDir && checkOutDir(outDir)) { - outDirList.push(...Array(guidList.length - outDirList.length) - .fill(outDir)); - } - else { - outDirList.push(...Array(guidList.length - outDirList.length) - .fill(defaultOutDir)); - } + continue; + } - foundUrl = false; - continue; - } - else { - logger.warn(`Found options without preceding url at line ${i + 1}, skipping..`); - continue; + // we now have a valid url, check next line for option + if (nextLine) { + const optionDir = parseOption('-dir', nextLine); + + if (optionDir && makeOutDir(optionDir)) { + outDir = optionDir; + // if there was an option we skip a line + i++; } } - /* now line is not empty nor an option line. - If foundUrl is still true last line didn't have a directory option - so we stil need to add the default outDir to outDirList to */ - if (foundUrl) { - outDirList.push(...Array(guidList.length - outDirList.length) - .fill(defaultOutDir)); - foundUrl = false; - } - - const guids: Array | null = await extractGuids(line, apiClient); - - if (guids) { - guidList.push(...guids); - foundUrl = true; + if (streamUrl) { + stream.push({ + url: line, + outDir + }); } else { - logger.warn(`Invalid url at line ${i + 1}, skipping..`); + share.push({ + url: line, + outDir + }); } } - // if foundUrl is still true after the loop we have some url without an outDir - if (foundUrl) { - outDirList.push(...Array(guidList.length - outDirList.length) - .fill(defaultOutDir)); - } - return [guidList, outDirList]; + return [stream, share]; } // This leaves us the option to add more options (badum tss) _Luca function parseOption(optionSyntax: string, item: string): string | null { const match: RegExpMatchArray | null = item.match( - RegExp(`^\\s*${optionSyntax}\\s?=\\s?['"](.*)['"]`) + RegExp(`^\\s+${optionSyntax}\\s*=\\s*['"](.*)['"]`) ); return match ? match[1] : null; } - -export function checkOutDir(directory: string): boolean { +/** + * @param directory path to create + * @returns true on success, false otherwise + */ +export function makeOutDir(directory: string): boolean { if (!fs.existsSync(directory)) { try { fs.mkdirSync(directory); diff --git a/src/VideoUtils.ts b/src/VideoUtils.ts index 0e2ff3e..98417c9 100644 --- a/src/VideoUtils.ts +++ b/src/VideoUtils.ts @@ -1,13 +1,14 @@ -import { ApiClient } from './ApiClient'; +import { StreamApiClient } from './ApiClient'; import { promptUser } from './CommandLineParser'; import { logger } from './Logger'; -import { Video, Session } from './Types'; +import { StreamVideo, StreamSession, VideoUrl } from './Types'; import { AxiosResponse } from 'axios'; import fs from 'fs'; import { parse as parseDuration, Duration } from 'iso8601-duration'; import path from 'path'; import sanitizeWindowsName from 'sanitize-filename'; +import { extractStreamGuids } from './Utils'; function publishedDateToString(date: string): string { const dateJs: Date = new Date(date); @@ -45,8 +46,8 @@ function durationToTotalChunks(duration: string): number { } -export async function getVideoInfo(videoGuids: Array, session: Session, subtitles?: boolean): Promise> { - const metadata: Array