1
0
mirror of https://github.com/snobu/destreamer.git synced 2026-01-17 05:22:18 +00:00

refactor toward SharePoint downloader

This commit is contained in:
Luca Armaroli
2021-10-13 22:01:54 +02:00
parent b2497865a1
commit 6a2159b266
12 changed files with 482 additions and 445 deletions

4
.gitignore vendored
View File

@@ -3,10 +3,12 @@
*.log
*.js
*.zip
*.xml
yarn.lock
.chrome_data
node_modules
videos
release
build
yarn.lock

View File

@@ -1,16 +1,16 @@
import { logger } from './Logger';
import { Session } from './Types';
import { StreamSession } from './Types';
import axios, { AxiosRequestConfig, AxiosResponse, AxiosInstance, AxiosError } from 'axios';
import axiosRetry, { isNetworkOrIdempotentRequestError } from 'axios-retry';
export class ApiClient {
private static instance: ApiClient;
export class StreamApiClient {
private static instance: StreamApiClient;
private axiosInstance?: AxiosInstance;
private session?: Session;
private session?: StreamSession;
private constructor(session?: Session) {
private constructor(session?: StreamSession) {
this.session = session;
this.axiosInstance = axios.create({
baseURL: session?.ApiGatewayUri,
@@ -50,16 +50,16 @@ export class ApiClient {
*
* @param session used if initializing
*/
public static getInstance(session?: Session): ApiClient {
if (!ApiClient.instance) {
ApiClient.instance = new ApiClient(session);
public static getInstance(session?: StreamSession): StreamApiClient {
if (!StreamApiClient.instance) {
StreamApiClient.instance = new StreamApiClient(session);
}
return ApiClient.instance;
return StreamApiClient.instance;
}
public setSession(session: Session): void {
if (!ApiClient.instance) {
public setSession(session: StreamSession): void {
if (!StreamApiClient.instance) {
logger.warn("Trying to update ApiCient session when it's not initialized!");
}

View File

@@ -1,5 +1,5 @@
import { CLI_ERROR, ERROR_CODE } from './Errors';
import { checkOutDir } from './Utils';
import { makeOutDir } from './Utils';
import { logger } from './Logger';
import { templateElements } from './Types';
@@ -9,7 +9,7 @@ import sanitize from 'sanitize-filename';
import yargs from 'yargs';
export const argv: any = yargs.options({
export const argv = yargs.options({
username: {
alias: 'u',
type: 'string',
@@ -114,7 +114,7 @@ export const argv: any = yargs.options({
.check(() => noArguments())
.check((argv: any) => checkInputConflicts(argv.videoUrls, argv.inputFile))
.check((argv: any) => {
if (checkOutDir(argv.outputDirectory)) {
if (makeOutDir(argv.outputDirectory)) {
return true;
}
else {

172
src/Downloaders.ts Normal file
View File

@@ -0,0 +1,172 @@
import { StreamApiClient } from './ApiClient';
import { argv } from './CommandLineParser';
import { ERROR_CODE } from './Errors';
import { logger } from './Logger';
import { doStreamLogin } from './LoginModules';
import { drawThumbnail } from './Thumbnail';
import { refreshSession, TokenCache } from './TokenCache';
import { StreamVideo, VideoUrl } from './Types';
import { ffmpegTimemarkToChunk } from './Utils';
import { createUniquePath, getStreamInfo } from './VideoUtils';
import cliProgress from 'cli-progress';
import fs from 'fs';
const { FFmpegCommand, FFmpegInput, FFmpegOutput } = require('@tedconf/fessonia')();
const tokenCache: TokenCache = new TokenCache();
export async function downloadStreamVideo(videoUrls: Array<VideoUrl>): Promise<void> {
let session = tokenCache.Read() ?? await doStreamLogin('https://web.microsoftstream.com/', tokenCache, argv.username);
logger.verbose('Session and API info \n' +
'\t API Gateway URL: '.cyan + session.ApiGatewayUri + '\n' +
'\t API Gateway version: '.cyan + session.ApiGatewayVersion + '\n');
logger.info('Fetching videos info... \n');
const videos: Array<StreamVideo> = createUniquePath(
await getStreamInfo(videoUrls, session, argv.closedCaptions),
argv.outputTemplate, argv.format, argv.skip
);
if (argv.simulate) {
videos.forEach((video: StreamVideo) => {
logger.info(
'\nTitle: '.green + video.title +
'\nOutPath: '.green + video.outPath +
'\nPublished Date: '.green + video.publishDate +
'\nPlayback URL: '.green + video.playbackUrl +
((video.captionsUrl) ? ('\nCC URL: '.green + video.captionsUrl) : '')
);
});
return;
}
for (const [index, video] of videos.entries()) {
if (argv.skip && fs.existsSync(video.outPath)) {
logger.info(`File already exists, skipping: ${video.outPath} \n`);
continue;
}
if (argv.keepLoginCookies && index !== 0) {
logger.info('Trying to refresh token...');
session = await refreshSession('https://web.microsoftstream.com/video/' + video.guid);
StreamApiClient.getInstance().setSession(session);
}
const pbar: cliProgress.SingleBar = new cliProgress.SingleBar({
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
format: 'progress [{bar}] {percentage}% {speed} {eta_formatted}',
// process.stdout.columns may return undefined in some terminals (Cygwin/MSYS)
barsize: Math.floor((process.stdout.columns || 30) / 3),
stopOnComplete: true,
hideCursor: true,
});
logger.info(`\nDownloading Video: ${video.title} \n`);
logger.verbose('Extra video info \n' +
'\t Video m3u8 playlist URL: '.cyan + video.playbackUrl + '\n' +
'\t Video tumbnail URL: '.cyan + video.posterImageUrl + '\n' +
'\t Video subtitle URL (may not exist): '.cyan + video.captionsUrl + '\n' +
'\t Video total chunks: '.cyan + video.totalChunks + '\n');
logger.info('Spawning ffmpeg with access token and HLS URL. This may take a few seconds...\n\n');
if (!process.stdout.columns) {
logger.warn(
'Unable to get number of columns from terminal.\n' +
'This happens sometimes in Cygwin/MSYS.\n' +
'No progress bar can be rendered, however the download process should not be affected.\n\n' +
'Please use PowerShell or cmd.exe to run destreamer on Windows.'
);
}
const headers: string = 'Authorization: Bearer ' + session.AccessToken;
if (!argv.noExperiments) {
if (video.posterImageUrl) {
await drawThumbnail(video.posterImageUrl, session);
}
}
const ffmpegInpt: any = new FFmpegInput(video.playbackUrl, new Map([
['headers', headers]
]));
const ffmpegOutput: any = new FFmpegOutput(video.outPath, new Map([
argv.acodec === 'none' ? ['an', null] : ['c:a', argv.acodec],
argv.vcodec === 'none' ? ['vn', null] : ['c:v', argv.vcodec],
['n', null]
]));
const ffmpegCmd: any = new FFmpegCommand();
const cleanupFn: () => void = () => {
pbar.stop();
if (argv.noCleanup) {
return;
}
try {
fs.unlinkSync(video.outPath);
}
catch (e) {
// Future handling of an error (maybe)
}
};
pbar.start(video.totalChunks, 0, {
speed: '0'
});
// prepare ffmpeg command line
ffmpegCmd.addInput(ffmpegInpt);
ffmpegCmd.addOutput(ffmpegOutput);
if (argv.closedCaptions && video.captionsUrl) {
const captionsInpt: any = new FFmpegInput(video.captionsUrl, new Map([
['headers', headers]
]));
ffmpegCmd.addInput(captionsInpt);
}
ffmpegCmd.on('update', async (data: any) => {
const currentChunks: number = ffmpegTimemarkToChunk(data.out_time);
pbar.update(currentChunks, {
speed: data.bitrate
});
// Graceful fallback in case we can't get columns (Cygwin/MSYS)
if (!process.stdout.columns) {
process.stdout.write(`--- Speed: ${data.bitrate}, Cursor: ${data.out_time}\r`);
}
});
process.on('SIGINT', cleanupFn);
// let the magic begin...
await new Promise((resolve: any) => {
ffmpegCmd.on('error', (error: any) => {
cleanupFn();
logger.error(`FFmpeg returned an error: ${error.message}`);
process.exit(ERROR_CODE.UNK_FFMPEG_ERROR);
});
ffmpegCmd.on('success', () => {
pbar.update(video.totalChunks); // set progress bar to 100%
logger.info(`\nDownload finished: ${video.outPath} \n`);
resolve();
});
ffmpegCmd.spawn();
});
process.removeListener('SIGINT', cleanupFn);
}
}

84
src/LoginModules.ts Normal file
View File

@@ -0,0 +1,84 @@
import { logger } from './Logger';
import puppeteer from 'puppeteer';
import { getPuppeteerChromiumPath } from './PuppeteerHelper';
import { chromeCacheFolder } from './destreamer';
import { argv } from './CommandLineParser';
import { StreamSession } from './Types';
import { ERROR_CODE } from './Errors';
import { TokenCache } from './TokenCache';
export async function doStreamLogin(url: string, tokenCache: TokenCache, username?: string): Promise<StreamSession> {
logger.info('Launching headless Chrome to perform the OpenID Connect dance...');
const browser: puppeteer.Browser = await puppeteer.launch({
executablePath: getPuppeteerChromiumPath(),
headless: false,
userDataDir: (argv.keepLoginCookies) ? chromeCacheFolder : undefined,
args: [
'--disable-dev-shm-usage',
'--fast-start',
'--no-sandbox'
]
});
const page: puppeteer.Page = (await browser.pages())[0];
logger.info('Navigating to login page...');
await page.goto(url, { waitUntil: 'load' });
try {
if (username) {
await page.waitForSelector('input[type="email"]', { timeout: 3000 });
await page.keyboard.type(username);
await page.click('input[type="submit"]');
}
else {
/* If a username was not provided we let the user take actions that
lead up to the video page. */
}
}
catch (e) {
/* If there is no email input selector we aren't in the login module,
we are probably using the cache to aid the login.
It could finish the login on its own if the user said 'yes' when asked to
remember the credentials or it could still prompt the user for a password */
}
await browser.waitForTarget((target: puppeteer.Target) => target.url().endsWith('microsoftstream.com/'), { timeout: 150000 });
logger.info('We are logged in.');
let session: StreamSession | null = null;
let tries = 1;
while (!session) {
try {
let sessionInfo: any;
session = await page.evaluate(
() => {
return {
AccessToken: sessionInfo.AccessToken,
ApiGatewayUri: sessionInfo.ApiGatewayUri,
ApiGatewayVersion: sessionInfo.ApiGatewayVersion
};
}
);
}
catch (error) {
if (tries > 5) {
process.exit(ERROR_CODE.NO_SESSION_INFO);
}
session = null;
tries++;
await page.waitForTimeout(3000);
}
}
tokenCache.Write(session);
logger.info('Wrote access token to token cache.');
logger.info("At this point Chromium's job is done, shutting it down...\n");
await browser.close();
return session;
}

View File

@@ -1,12 +1,12 @@
import { ApiClient } from './ApiClient';
import { Session } from './Types';
import { StreamApiClient } from './ApiClient';
import { StreamSession } from './Types';
import terminalImage from 'terminal-image';
import { AxiosResponse } from 'axios';
export async function drawThumbnail(posterImage: string, session: Session): Promise<void> {
const apiClient: ApiClient = ApiClient.getInstance(session);
export async function drawThumbnail(posterImage: string, session: StreamSession): Promise<void> {
const apiClient: StreamApiClient = StreamApiClient.getInstance(session);
const thumbnail: Buffer = await apiClient.callUrl(posterImage, 'get', null, 'arraybuffer')
.then((response: AxiosResponse<any> | undefined) => response?.data);

View File

@@ -2,7 +2,7 @@ import { chromeCacheFolder } from './destreamer';
import { ERROR_CODE } from './Errors';
import { logger } from './Logger';
import { getPuppeteerChromiumPath } from './PuppeteerHelper';
import { Session } from './Types';
import { StreamSession } from './Types';
import fs from 'fs';
import jwtDecode from 'jwt-decode';
@@ -12,14 +12,14 @@ import puppeteer from 'puppeteer';
export class TokenCache {
private tokenCacheFile = '.token_cache';
public Read(): Session | null {
public Read(): StreamSession | null {
if (!fs.existsSync(this.tokenCacheFile)) {
logger.warn(`${this.tokenCacheFile} not found. \n`);
return null;
}
const session: Session = JSON.parse(fs.readFileSync(this.tokenCacheFile, 'utf8'));
const session: StreamSession = JSON.parse(fs.readFileSync(this.tokenCacheFile, 'utf8'));
type Jwt = {
[key: string]: any
@@ -41,7 +41,7 @@ export class TokenCache {
return session;
}
public Write(session: Session): void {
public Write(session: StreamSession): void {
const s: string = JSON.stringify(session, null, 4);
fs.writeFile(this.tokenCacheFile, s, (err: any) => {
if (err) {
@@ -54,7 +54,7 @@ export class TokenCache {
}
export async function refreshSession(url: string): Promise<Session> {
export async function refreshSession(url: string): Promise<StreamSession> {
const videoId: string = url.split('/').pop() ?? process.exit(ERROR_CODE.INVALID_VIDEO_GUID);
const browser: puppeteer.Browser = await puppeteer.launch({
@@ -73,7 +73,7 @@ export async function refreshSession(url: string): Promise<Session> {
await browser.waitForTarget((target: puppeteer.Target) => target.url().includes(videoId), { timeout: 30000 });
let session: Session | null = null;
let session: StreamSession | null = null;
let tries = 1;
while (!session) {

View File

@@ -1,11 +1,27 @@
export type Session = {
export type StreamSession = {
AccessToken: string;
ApiGatewayUri: string;
ApiGatewayVersion: string;
}
export type Video = {
export type VideoUrl = {
url: string,
outDir: string
}
export type SharepointVideo = {
// if we can download the MP4 or we need to use DASH
direct: boolean;
playbackUrl: string;
title: string;
outPath: string
}
export type StreamVideo = {
guid: string;
title: string;
duration: string;
publishDate: string;

View File

@@ -1,47 +1,63 @@
import { ApiClient } from './ApiClient';
import { StreamApiClient } from './ApiClient';
import { ERROR_CODE } from './Errors';
import { logger } from './Logger';
import { Session } from './Types';
import { StreamSession, VideoUrl } from './Types';
import { AxiosResponse } from 'axios';
import { execSync } from 'child_process';
import fs from 'fs';
async function extractGuids(url: string, client: ApiClient): Promise<Array<string> | null> {
const streamUrlRegex = new RegExp(/https?:\/\/web\.microsoftstream\.com.*/);
const shareUrlRegex = new RegExp(/https?:\/\/.+\.sharepoint\.com.*/);
/** we place the guid in the url fild in the return */
export async function extractStreamGuids(urlList: Array<VideoUrl>, session: StreamSession): Promise<Array<VideoUrl>> {
const videoRegex = new RegExp(/https:\/\/.*\/video\/(\w{8}-(?:\w{4}-){3}\w{12})/);
const groupRegex = new RegExp(/https:\/\/.*\/group\/(\w{8}-(?:\w{4}-){3}\w{12})/);
// const sharepointDirect = new RegExp(/https:\/\/(?<hostname>.+\.sharepoint\.com)\/(?:.*\/)?(?<filename>.*\.mp4)/);
// const sharepointEncoded = new RegExp(/https:\/\/(?<hostname>.+\.sharepoint\.com)\/.*id=(?<encodedFilename>.*mp4)/);
const videoMatch: RegExpExecArray | null = videoRegex.exec(url);
const groupMatch: RegExpExecArray | null = groupRegex.exec(url);
const apiClient: StreamApiClient = StreamApiClient.getInstance(session);
const guidList: Array<VideoUrl> = [];
if (videoMatch) {
return [videoMatch[1]];
}
else if (groupMatch) {
const videoNumber: number = await client.callApi(`groups/${groupMatch[1]}`, 'get')
.then((response: AxiosResponse<any> | undefined) => response?.data.metrics.videos);
const result: Array<string> = [];
for (const url of urlList) {
const videoMatch: RegExpExecArray | null = videoRegex.exec(url.url);
const groupMatch: RegExpExecArray | null = groupRegex.exec(url.url);
// Anything above $top=100 results in 400 Bad Request
// Use $skip to skip the first 100 and get another 100 and so on
for (let index = 0; index <= Math.floor(videoNumber / 100); index++) {
const partial: Array<string> = await client.callApi(
`groups/${groupMatch[1]}/videos?$skip=${100 * index}&` +
'$top=100&$orderby=publishedDate asc', 'get')
.then(
(response: AxiosResponse<any> | undefined) =>
response?.data.value.map((item: any) => item.id)
);
result.push(...partial);
if (videoMatch) {
guidList.push({
url: videoMatch[1],
outDir: url.outDir
});
}
else if (groupMatch) {
const videoNumber: number = await apiClient.callApi(`groups/${groupMatch[1]}`, 'get')
.then((response: AxiosResponse<any> | undefined) => response?.data.metrics.videos);
return result;
// Anything above $top=100 results in 400 Bad Request
// Use $skip to skip the first 100 and get another 100 and so on
for (let index = 0; index <= Math.floor(videoNumber / 100); index++) {
await apiClient.callApi(
`groups/${groupMatch[1]}/videos?$skip=${100 * index}&` +
'$top=100&$orderby=publishedDate asc', 'get'
).then((response: AxiosResponse<any> | undefined) => {
response?.data.value.forEach((video: { id: string }) =>
guidList.push({
url: video.id,
outDir: url.outDir
})
);
});
}
}
else {
logger.warn(`Invalid url '${url.url}', skipping...`);
}
}
return null;
return guidList;
}
@@ -52,30 +68,32 @@ async function extractGuids(url: string, client: ApiClient): Promise<Array<strin
*
* @param {Array<string>} urlList list of link to parse
* @param {string} defaultOutDir the directry used to save the videos
* @param {Session} session used to call the API to get the GUIDs from group links
*
* @returns Array of 2 elements, 1st one being the GUIDs array, 2nd one the output directories array
* @returns Array of 2 elements: 1st an array of Microsoft Stream urls, 2nd an array of SharePoint urls
*/
export async function parseCLIinput(urlList: Array<string>, defaultOutDir: string,
session: Session): Promise<Array<Array<string>>> {
const apiClient: ApiClient = ApiClient.getInstance(session);
const guidList: Array<string> = [];
export function parseCLIinput(urlList: Array<string>, defaultOutDir: string): Array<Array<VideoUrl>> {
const stream: Array<VideoUrl> = [];
const share: Array<VideoUrl> = [];
for (const url of urlList) {
const guids: Array<string> | null = await extractGuids(url, apiClient);
if (guids) {
guidList.push(...guids);
if (streamUrlRegex.test(url)) {
stream.push({
url: url,
outDir: defaultOutDir
});
}
else if (shareUrlRegex.test(url)) {
share.push({
url: url,
outDir: defaultOutDir
});
}
else {
logger.warn(`Invalid url '${url}', skipping..`);
}
}
const outDirList: Array<string> = Array(guidList.length).fill(defaultOutDir);
return [guidList, outDirList];
return [stream, share];
}
@@ -86,94 +104,84 @@ export async function parseCLIinput(urlList: Array<string>, defaultOutDir: strin
*
* @param {string} inputFile path to the text file
* @param {string} defaultOutDir the default/fallback directory used to save the videos
* @param {Session} session used to call the API to get the GUIDs from group links
*
* @returns Array of 2 elements, 1st one being the GUIDs array, 2nd one the output directories array
*/
export async function parseInputFile(inputFile: string, defaultOutDir: string,
session: Session): Promise<Array<Array<string>>> {
export function parseInputFile(inputFile: string, defaultOutDir: string): Array<Array<VideoUrl>> {
// rawContent is a list of each line of the file
const rawContent: Array<string> = fs.readFileSync(inputFile).toString()
.split(/\r?\n/);
const apiClient: ApiClient = ApiClient.getInstance(session);
const guidList: Array<string> = [];
const outDirList: Array<string> = [];
// if the last line was an url set this
let foundUrl = false;
const rawContent: Array<string> = fs.readFileSync(inputFile).toString().split(/\r?\n/);
const stream: Array<VideoUrl> = [];
const share: Array<VideoUrl> = [];
let streamUrl = false;
for (let i = 0; i < rawContent.length; i++) {
const line: string = rawContent[i];
const nextLine: string | null = i < rawContent.length ? rawContent[i + 1] : null;
let outDir = defaultOutDir;
// filter out lines with no content
if (!line.match(/\S/)) {
logger.warn(`Line ${i + 1} is empty, skipping..`);
continue;
}
// parse if line is option
else if (line.includes('-dir')) {
if (foundUrl) {
const outDir: string | null = parseOption('-dir', line);
// check for urls
else if (streamUrlRegex.test(line)) {
streamUrl = true;
}
else if (shareUrlRegex.test(line)) {
streamUrl = false;
}
// now invalid line since we skip ahead one line if we find dir option
else {
logger.warn(`Line ${i + 1}: '${line}' is invalid, skipping..`);
if (outDir && checkOutDir(outDir)) {
outDirList.push(...Array(guidList.length - outDirList.length)
.fill(outDir));
}
else {
outDirList.push(...Array(guidList.length - outDirList.length)
.fill(defaultOutDir));
}
continue;
}
foundUrl = false;
continue;
}
else {
logger.warn(`Found options without preceding url at line ${i + 1}, skipping..`);
continue;
// we now have a valid url, check next line for option
if (nextLine) {
const optionDir = parseOption('-dir', nextLine);
if (optionDir && makeOutDir(optionDir)) {
outDir = optionDir;
// if there was an option we skip a line
i++;
}
}
/* now line is not empty nor an option line.
If foundUrl is still true last line didn't have a directory option
so we stil need to add the default outDir to outDirList to */
if (foundUrl) {
outDirList.push(...Array(guidList.length - outDirList.length)
.fill(defaultOutDir));
foundUrl = false;
}
const guids: Array<string> | null = await extractGuids(line, apiClient);
if (guids) {
guidList.push(...guids);
foundUrl = true;
if (streamUrl) {
stream.push({
url: line,
outDir
});
}
else {
logger.warn(`Invalid url at line ${i + 1}, skipping..`);
share.push({
url: line,
outDir
});
}
}
// if foundUrl is still true after the loop we have some url without an outDir
if (foundUrl) {
outDirList.push(...Array(guidList.length - outDirList.length)
.fill(defaultOutDir));
}
return [guidList, outDirList];
return [stream, share];
}
// This leaves us the option to add more options (badum tss) _Luca
function parseOption(optionSyntax: string, item: string): string | null {
const match: RegExpMatchArray | null = item.match(
RegExp(`^\\s*${optionSyntax}\\s?=\\s?['"](.*)['"]`)
RegExp(`^\\s+${optionSyntax}\\s*=\\s*['"](.*)['"]`)
);
return match ? match[1] : null;
}
export function checkOutDir(directory: string): boolean {
/**
* @param directory path to create
* @returns true on success, false otherwise
*/
export function makeOutDir(directory: string): boolean {
if (!fs.existsSync(directory)) {
try {
fs.mkdirSync(directory);

View File

@@ -1,13 +1,14 @@
import { ApiClient } from './ApiClient';
import { StreamApiClient } from './ApiClient';
import { promptUser } from './CommandLineParser';
import { logger } from './Logger';
import { Video, Session } from './Types';
import { StreamVideo, StreamSession, VideoUrl } from './Types';
import { AxiosResponse } from 'axios';
import fs from 'fs';
import { parse as parseDuration, Duration } from 'iso8601-duration';
import path from 'path';
import sanitizeWindowsName from 'sanitize-filename';
import { extractStreamGuids } from './Utils';
function publishedDateToString(date: string): string {
const dateJs: Date = new Date(date);
@@ -45,8 +46,8 @@ function durationToTotalChunks(duration: string): number {
}
export async function getVideoInfo(videoGuids: Array<string>, session: Session, subtitles?: boolean): Promise<Array<Video>> {
const metadata: Array<Video> = [];
export async function getStreamInfo(videoUrls: Array<VideoUrl>, session: StreamSession, subtitles?: boolean): Promise<Array<StreamVideo>> {
const metadata: Array<StreamVideo> = [];
let title: string;
let duration: string;
let publishDate: string;
@@ -54,19 +55,23 @@ export async function getVideoInfo(videoGuids: Array<string>, session: Session,
let author: string;
let authorEmail: string;
let uniqueId: string;
const outPath = '';
let totalChunks: number;
let playbackUrl: string;
let posterImageUrl: string;
let captionsUrl: string | undefined;
const apiClient: ApiClient = ApiClient.getInstance(session);
const apiClient: StreamApiClient = StreamApiClient.getInstance(session);
// we place the guid in the url field
const videoGUIDs = await extractStreamGuids(videoUrls, session);
/* TODO: change this to a single guid at a time to ease our footprint on the
MSS servers or we get throttled after 10 sequential reqs */
for (const guid of videoGuids) {
for (const guid of videoGUIDs) {
const response: AxiosResponse<any> | undefined =
await apiClient.callApi('videos/' + guid + '?$expand=creator', 'get');
await apiClient.callApi('videos/' + guid.url + '?$expand=creator', 'get');
title = sanitizeWindowsName(response?.data['name']);
@@ -80,7 +85,7 @@ export async function getVideoInfo(videoGuids: Array<string>, session: Session,
authorEmail = response?.data['creator'].mail;
uniqueId = '#' + guid.split('-')[0];
uniqueId = '#' + guid.url.split('-')[0];
totalChunks = durationToTotalChunks(response?.data.media['duration']);
@@ -112,18 +117,19 @@ export async function getVideoInfo(videoGuids: Array<string>, session: Session,
}
metadata.push({
title: title,
duration: duration,
publishDate: publishDate,
publishTime: publishTime,
author: author,
authorEmail: authorEmail,
uniqueId: uniqueId,
outPath: outPath,
totalChunks: totalChunks, // Abstraction of FFmpeg timemark
playbackUrl: playbackUrl,
posterImageUrl: posterImageUrl,
captionsUrl: captionsUrl
guid: guid.url,
title,
duration,
publishDate,
publishTime,
author,
authorEmail,
uniqueId,
outPath: guid.outDir,
totalChunks, // Abstraction of FFmpeg timemark
playbackUrl,
posterImageUrl,
captionsUrl
});
}
@@ -131,16 +137,16 @@ export async function getVideoInfo(videoGuids: Array<string>, session: Session,
}
export function createUniquePath(videos: Array<Video>, outDirs: Array<string>, template: string, format: string, skip?: boolean): Array<Video> {
export function createUniquePath(videos: Array<StreamVideo>, template: string, format: string, skip?: boolean): Array<StreamVideo> {
videos.forEach((video: Video, index: number) => {
videos.forEach((video: StreamVideo) => {
let title: string = template;
let finalTitle: string;
const elementRegEx = RegExp(/{(.*?)}/g);
let match = elementRegEx.exec(template);
while (match) {
const value = video[match[1] as keyof Video] as string;
const value = video[match[1] as keyof StreamVideo] as string;
title = title.replace(match[0], value);
match = elementRegEx.exec(template);
}
@@ -148,7 +154,7 @@ export function createUniquePath(videos: Array<Video>, outDirs: Array<string>, t
let i = 0;
finalTitle = title;
while (!skip && fs.existsSync(path.join(outDirs[index], finalTitle + '.' + format))) {
while (!skip && fs.existsSync(path.join(video.outPath, finalTitle + '.' + format))) {
finalTitle = `${title}.${++i}`;
}
@@ -158,7 +164,7 @@ export function createUniquePath(videos: Array<Video>, outDirs: Array<string>, t
logger.warn(`Not a valid Windows file name: "${finalFileName}".\nReplacing invalid characters with underscores to preserve cross-platform consistency.`);
}
video.outPath = path.join(outDirs[index], finalFileName);
video.outPath = path.join(video.outPath, finalFileName);
});

View File

@@ -2,22 +2,13 @@ import { argv } from './CommandLineParser';
import { ERROR_CODE } from './Errors';
import { setProcessEvents } from './Events';
import { logger } from './Logger';
import { getPuppeteerChromiumPath } from './PuppeteerHelper';
import { drawThumbnail } from './Thumbnail';
import { TokenCache, refreshSession } from './TokenCache';
import { Video, Session } from './Types';
import { checkRequirements, ffmpegTimemarkToChunk, parseInputFile, parseCLIinput} from './Utils';
import { getVideoInfo, createUniquePath } from './VideoUtils';
import { VideoUrl } from './Types';
import { checkRequirements, parseInputFile, parseCLIinput } from './Utils';
import cliProgress from 'cli-progress';
import fs from 'fs';
import isElevated from 'is-elevated';
import puppeteer from 'puppeteer';
import { ApiClient } from './ApiClient';
import { downloadStreamVideo } from './Downloaders';
const { FFmpegCommand, FFmpegInput, FFmpegOutput } = require('@tedconf/fessonia')();
const tokenCache: TokenCache = new TokenCache();
export const chromeCacheFolder = '.chrome_data';
@@ -44,259 +35,26 @@ async function init(): Promise<void> {
}
async function DoInteractiveLogin(url: string, username?: string): Promise<Session> {
logger.info('Launching headless Chrome to perform the OpenID Connect dance...');
const browser: puppeteer.Browser = await puppeteer.launch({
executablePath: getPuppeteerChromiumPath(),
headless: false,
userDataDir: (argv.keepLoginCookies) ? chromeCacheFolder : undefined,
args: [
'--disable-dev-shm-usage',
'--fast-start',
'--no-sandbox'
]
});
const page: puppeteer.Page = (await browser.pages())[0];
logger.info('Navigating to login page...');
await page.goto(url, { waitUntil: 'load' });
try {
if (username) {
await page.waitForSelector('input[type="email"]', {timeout: 3000});
await page.keyboard.type(username);
await page.click('input[type="submit"]');
}
else {
/* If a username was not provided we let the user take actions that
lead up to the video page. */
}
}
catch (e) {
/* If there is no email input selector we aren't in the login module,
we are probably using the cache to aid the login.
It could finish the login on its own if the user said 'yes' when asked to
remember the credentials or it could still prompt the user for a password */
}
await browser.waitForTarget((target: puppeteer.Target) => target.url().endsWith('microsoftstream.com/'), { timeout: 150000 });
logger.info('We are logged in.');
let session: Session | null = null;
let tries = 1;
while (!session) {
try {
let sessionInfo: any;
session = await page.evaluate(
() => {
return {
AccessToken: sessionInfo.AccessToken,
ApiGatewayUri: sessionInfo.ApiGatewayUri,
ApiGatewayVersion: sessionInfo.ApiGatewayVersion
};
}
);
}
catch (error) {
if (tries > 5) {
process.exit(ERROR_CODE.NO_SESSION_INFO);
}
session = null;
tries++;
await page.waitFor(3000);
}
}
tokenCache.Write(session);
logger.info('Wrote access token to token cache.');
logger.info("At this point Chromium's job is done, shutting it down...\n");
await browser.close();
return session;
}
async function downloadVideo(videoGUIDs: Array<string>, outputDirectories: Array<string>, session: Session): Promise<void> {
logger.info('Fetching videos info... \n');
const videos: Array<Video> = createUniquePath (
await getVideoInfo(videoGUIDs, session, argv.closedCaptions),
outputDirectories, argv.outputTemplate, argv.format, argv.skip
);
if (argv.simulate) {
videos.forEach((video: Video) => {
logger.info(
'\nTitle: '.green + video.title +
'\nOutPath: '.green + video.outPath +
'\nPublished Date: '.green + video.publishDate +
'\nPlayback URL: '.green + video.playbackUrl +
((video.captionsUrl) ? ('\nCC URL: '.green + video.captionsUrl) : '')
);
});
return;
}
for (const [index, video] of videos.entries()) {
if (argv.skip && fs.existsSync(video.outPath)) {
logger.info(`File already exists, skipping: ${video.outPath} \n`);
continue;
}
if (argv.keepLoginCookies && index !== 0) {
logger.info('Trying to refresh token...');
session = await refreshSession('https://web.microsoftstream.com/video/' + videoGUIDs[index]);
ApiClient.getInstance().setSession(session);
}
const pbar: cliProgress.SingleBar = new cliProgress.SingleBar({
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
format: 'progress [{bar}] {percentage}% {speed} {eta_formatted}',
// process.stdout.columns may return undefined in some terminals (Cygwin/MSYS)
barsize: Math.floor((process.stdout.columns || 30) / 3),
stopOnComplete: true,
hideCursor: true,
});
logger.info(`\nDownloading Video: ${video.title} \n`);
logger.verbose('Extra video info \n' +
'\t Video m3u8 playlist URL: '.cyan + video.playbackUrl + '\n' +
'\t Video tumbnail URL: '.cyan + video.posterImageUrl + '\n' +
'\t Video subtitle URL (may not exist): '.cyan + video.captionsUrl + '\n' +
'\t Video total chunks: '.cyan + video.totalChunks + '\n');
logger.info('Spawning ffmpeg with access token and HLS URL. This may take a few seconds...\n\n');
if (!process.stdout.columns) {
logger.warn(
'Unable to get number of columns from terminal.\n' +
'This happens sometimes in Cygwin/MSYS.\n' +
'No progress bar can be rendered, however the download process should not be affected.\n\n' +
'Please use PowerShell or cmd.exe to run destreamer on Windows.'
);
}
const headers: string = 'Authorization: Bearer ' + session.AccessToken;
if (!argv.noExperiments) {
if (video.posterImageUrl) {
await drawThumbnail(video.posterImageUrl, session);
}
}
const ffmpegInpt: any = new FFmpegInput(video.playbackUrl, new Map([
['headers', headers]
]));
const ffmpegOutput: any = new FFmpegOutput(video.outPath, new Map([
argv.acodec === 'none' ? ['an', null] : ['c:a', argv.acodec],
argv.vcodec === 'none' ? ['vn', null] : ['c:v', argv.vcodec],
['n', null]
]));
const ffmpegCmd: any = new FFmpegCommand();
const cleanupFn: () => void = () => {
pbar.stop();
if (argv.noCleanup) {
return;
}
try {
fs.unlinkSync(video.outPath);
}
catch (e) {
// Future handling of an error (maybe)
}
};
pbar.start(video.totalChunks, 0, {
speed: '0'
});
// prepare ffmpeg command line
ffmpegCmd.addInput(ffmpegInpt);
ffmpegCmd.addOutput(ffmpegOutput);
if (argv.closedCaptions && video.captionsUrl) {
const captionsInpt: any = new FFmpegInput(video.captionsUrl, new Map([
['headers', headers]
]));
ffmpegCmd.addInput(captionsInpt);
}
ffmpegCmd.on('update', async (data: any) => {
const currentChunks: number = ffmpegTimemarkToChunk(data.out_time);
pbar.update(currentChunks, {
speed: data.bitrate
});
// Graceful fallback in case we can't get columns (Cygwin/MSYS)
if (!process.stdout.columns) {
process.stdout.write(`--- Speed: ${data.bitrate}, Cursor: ${data.out_time}\r`);
}
});
process.on('SIGINT', cleanupFn);
// let the magic begin...
await new Promise((resolve: any) => {
ffmpegCmd.on('error', (error: any) => {
cleanupFn();
logger.error(`FFmpeg returned an error: ${error.message}`);
process.exit(ERROR_CODE.UNK_FFMPEG_ERROR);
});
ffmpegCmd.on('success', () => {
pbar.update(video.totalChunks); // set progress bar to 100%
logger.info(`\nDownload finished: ${video.outPath} \n`);
resolve();
});
ffmpegCmd.spawn();
});
process.removeListener('SIGINT', cleanupFn);
}
}
async function main(): Promise<void> {
await init(); // must be first
let session: Session;
// eslint-disable-next-line prefer-const
session = tokenCache.Read() ?? await DoInteractiveLogin('https://web.microsoftstream.com/', argv.username);
logger.verbose('Session and API info \n' +
'\t API Gateway URL: '.cyan + session.ApiGatewayUri + '\n' +
'\t API Gateway version: '.cyan + session.ApiGatewayVersion + '\n');
let videoGUIDs: Array<string>;
let outDirs: Array<string>;
let streamVideos: Array<VideoUrl>, shareVideos: Array<VideoUrl>;
if (argv.videoUrls) {
logger.info('Parsing video/group urls');
[videoGUIDs, outDirs] = await parseCLIinput(argv.videoUrls as Array<string>, argv.outputDirectory, session);
[streamVideos, shareVideos] = await parseCLIinput(argv.videoUrls as Array<string>, argv.outputDirectory);
}
else {
logger.info('Parsing input file');
[videoGUIDs, outDirs] = await parseInputFile(argv.inputFile!, argv.outputDirectory, session);
[streamVideos, shareVideos] = await parseInputFile(argv.inputFile!, argv.outputDirectory);
}
logger.verbose('List of GUIDs and corresponding output directory \n' +
videoGUIDs.map((guid: string, i: number) =>
`\thttps://web.microsoftstream.com/video/${guid} => ${outDirs[i]} \n`).join(''));
streamVideos.map(video => `\t${video.url} => ${video.outDir} \n`).join(''));
downloadVideo(videoGUIDs, outDirs, session);
await downloadStreamVideo(streamVideos);
logger.debug(shareVideos);
}

View File

@@ -1,32 +1,14 @@
import { parseInputFile } from '../src/Utils';
import puppeteer from 'puppeteer';
import { extractStreamGuids, parseInputFile } from '../src/Utils';
import assert from 'assert';
import tmp from 'tmp';
import fs from 'fs';
import { Session } from './Types';
describe('Puppeteer', () => {
it('should grab GitHub page title', async () => {
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-dev-shm-usage', '--fast-start', '--no-sandbox']
});
const page = await browser.newPage();
await page.goto('https://github.com/', { waitUntil: 'load' });
let pageTitle = await page.title();
assert.equal(true, pageTitle.includes('GitHub'));
await browser.close();
}).timeout(30000); // yeah, this may take a while...
});
import { StreamSession, VideoUrl } from './Types';
// we cannot test groups parsing as that requires an actual session
describe('Destreamer parsing', () => {
it('Input file to arrays of URLs and DIRs', async () => {
const testSession: Session = {
it('Input file to arrays of guids', async () => {
const testSession: StreamSession = {
AccessToken: '',
ApiGatewayUri: '',
ApiGatewayVersion: ''
@@ -44,33 +26,42 @@ describe('Destreamer parsing', () => {
'https://web.microsoftstream.com/video/xxxxxx-gggg-xxxx-xxxx-xxxxxxxxxxxx',
''
];
const expectedGUIDsOut: Array<string> = [
'xxxxxxxx-aaaa-xxxx-xxxx-xxxxxxxxxxxx',
'xxxxxxxx-bbbb-xxxx-xxxx-xxxxxxxxxxxx',
'xxxxxxxx-cccc-xxxx-xxxx-xxxxxxxxxxxx',
'xxxxxxxx-dddd-xxxx-xxxx-xxxxxxxxxxxx',
'xxxxxxxx-eeee-xxxx-xxxx-xxxxxxxxxxxx'
];
const expectedDirOut: Array<string> = [
'videos',
'luca',
'videos',
'videos',
'videos'
const expectedStreamOut: Array<VideoUrl> = [
{
url: 'xxxxxxxx-aaaa-xxxx-xxxx-xxxxxxxxxxxx',
outDir: 'videos'
},
{
url: 'xxxxxxxx-bbbb-xxxx-xxxx-xxxxxxxxxxxx',
outDir: 'luca'
},
{
url: 'xxxxxxxx-cccc-xxxx-xxxx-xxxxxxxxxxxx',
outDir: 'videos'
},
{
url: 'xxxxxxxx-dddd-xxxx-xxxx-xxxxxxxxxxxx',
outDir: 'videos'
},
{
url: 'xxxxxxxx-eeee-xxxx-xxxx-xxxxxxxxxxxx',
outDir: 'videos'
},
];
const tmpFile = tmp.fileSync({ postfix: '.txt' });
fs.writeFileSync(tmpFile.fd, testIn.join('\r\n'));
const [testUrlOut , testDirOut]: Array<Array<string>> = await parseInputFile(tmpFile.name, 'videos', testSession);
if (testUrlOut.length !== expectedGUIDsOut.length) {
throw "Expected url list and test list don't have the same number of elements".red;
}
else if (testDirOut.length !== expectedDirOut.length) {
throw "Expected dir list and test list don't have the same number of elements".red;
}
assert.deepStrictEqual(testUrlOut, expectedGUIDsOut,
'Error in parsing the URLs, missmatch between test and expected'.red);
assert.deepStrictEqual(testUrlOut, expectedGUIDsOut,
'Error in parsing the DIRs, missmatch between test and expected'.red);
const [testStreamUrls]: Array<Array<VideoUrl>> = parseInputFile(tmpFile.name, 'videos');
assert.deepStrictEqual(
await extractStreamGuids(testStreamUrls, testSession),
expectedStreamOut,
'Error in parsing the URLs, missmatch between test and expected'.red
);
// assert.deepStrictEqual(testUrlOut, expectedGUIDsOut,
// 'Error in parsing the DIRs, missmatch between test and expected'.red);
assert.ok('Parsing of input file ok');
});
});