1
0
mirror of https://github.com/snobu/destreamer.git synced 2026-01-17 13:32:16 +00:00

video info e direct download from SharePoint

This commit is contained in:
Luca Armaroli
2021-10-14 20:44:04 +02:00
parent 6a2159b266
commit f23d2a25fe
9 changed files with 447 additions and 129 deletions

View File

@@ -1,8 +1,10 @@
import { logger } from './Logger';
import { StreamSession } from './Types';
import { ShareSession, StreamSession, Video } from './Types';
import { publishedDateToString, publishedTimeToString } from './VideoUtils';
import axios, { AxiosRequestConfig, AxiosResponse, AxiosInstance, AxiosError } from 'axios';
import axiosRetry, { isNetworkOrIdempotentRequestError } from 'axios-retry';
// import fs from 'fs';
export class StreamApiClient {
@@ -113,3 +115,118 @@ export class StreamApiClient {
});
}
}
export class ShareApiClient {
private axiosInstance: AxiosInstance;
private site: string;
public constructor(domain: string, site: string, session: ShareSession) {
this.axiosInstance = axios.create({
baseURL: domain,
// timeout: 7000,
headers: {
'User-Agent': 'destreamer/3.0 ALPHA',
'Cookie': `rtFa=${session.rtFa}; FedAuth=${session.FedAuth}`
}
});
this.site = site;
// FIXME: disabled because it was messing with the direct download check
// axiosRetry(this.axiosInstance, {
// // The following option is not working.
// // We should open an issue on the relative GitHub
// shouldResetTimeout: true,
// retries: 6,
// retryDelay: (retryCount: number) => {
// return retryCount * 2000;
// },
// retryCondition: (err: AxiosError) => {
// const retryCodes: Array<number> = [429, 500, 502, 503];
// if (isNetworkOrIdempotentRequestError(err)) {
// logger.warn(`${err}. Retrying request...`);
// return true;
// }
// logger.warn(`Got HTTP code ${err?.response?.status ?? undefined}.`);
// logger.warn('Here is the error message: ');
// console.dir(err.response?.data);
// logger.warn('We called this URL: ' + err.response?.config.baseURL + err.response?.config.url);
// const shouldRetry: boolean = retryCodes.includes(err?.response?.status ?? 0);
// return shouldRetry;
// }
// });
}
public async getVideoInfo(filePath: string, outDir: string): Promise<Video> {
let playbackUrl: string;
// TODO: Ripped this straigth from chromium inspector. Don't know don't care what it is right now. Check later
const payload = {
parameters: {
__metadata: {
type: 'SP.RenderListDataParameters'
},
ViewXml: `<View Scope="RecursiveAll"><Query><Where><Eq><FieldRef Name="FileRef" /><Value Type="Text"><![CDATA[${filePath}]]></Value></Eq></Where></Query><RowLimit Paged="TRUE">1</RowLimit></View>`,
RenderOptions: 12295,
AddRequiredFields: true
}
};
const url = `${this.site}/_api/web/GetListUsingPath(DecodedUrl=@a1)/RenderListDataAsStream?@a1='${filePath}'`;
logger.verbose(`Requesting video info for '${url}'`);
const info = await this.axiosInstance.post(url, payload, {
headers: {
'Content-Type': 'application/json;odata=verbose'
}
}).then(res => res.data);
// fs.writeFileSync('info.json', JSON.stringify(info, null, 4));
// FIXME: very bad but usefull in alpha stage to check for edge cases
if (info.ListData.Row.length !== 1) {
logger.error('More than 1 row in SharePoint video info', { fatal: true });
process.exit(1000);
}
const direct = await this.canDirectDownload(filePath);
if (direct) {
playbackUrl = this.axiosInstance.getUri({ url: filePath });
}
else {
playbackUrl = 'placeholder';
}
return {
direct,
title: filePath.split('/').pop() ?? 'video.mp4',
duration: '',
publishDate: publishedDateToString(info.ListData.Row[0]['Modified.']),
publishTime: publishedTimeToString(info.ListData.Row[0]['Modified.']),
author: info.ListData.Row[0]['Author.title'],
authorEmail: info.ListData.Row[0]['Author.email'],
uniqueId: info.ListData.Row[0]['GUID'].substring(1, 9),
outPath: outDir,
playbackUrl,
totalChunks: 0
};
}
private async canDirectDownload(filePath: string): Promise<boolean> {
logger.verbose(`Checking direct download for '${filePath}'`);
return this.axiosInstance.head(
filePath, { maxRedirects: 0 }
).then(
res => (res.status === 200)
).catch(
() => false
);
}
}

View File

@@ -1,16 +1,18 @@
import { StreamApiClient } from './ApiClient';
import { ShareApiClient, StreamApiClient } from './ApiClient';
import { argv } from './CommandLineParser';
import { ERROR_CODE } from './Errors';
import { logger } from './Logger';
import { doStreamLogin } from './LoginModules';
import { doShareLogin, doStreamLogin } from './LoginModules';
import { drawThumbnail } from './Thumbnail';
import { refreshSession, TokenCache } from './TokenCache';
import { StreamVideo, VideoUrl } from './Types';
import { Video, VideoUrl } from './Types';
import { ffmpegTimemarkToChunk } from './Utils';
import { createUniquePath, getStreamInfo } from './VideoUtils';
import cliProgress from 'cli-progress';
import fs from 'fs';
import { execSync } from 'child_process';
import path from 'path';
const { FFmpegCommand, FFmpegInput, FFmpegOutput } = require('@tedconf/fessonia')();
@@ -18,22 +20,23 @@ const tokenCache: TokenCache = new TokenCache();
export async function downloadStreamVideo(videoUrls: Array<VideoUrl>): Promise<void> {
logger.info('Downloading Microsoft Stream videos...');
let session = tokenCache.Read() ?? await doStreamLogin('https://web.microsoftstream.com/', tokenCache, argv.username);
logger.verbose('Session and API info \n' +
logger.verbose(
'Session and API info \n' +
'\t API Gateway URL: '.cyan + session.ApiGatewayUri + '\n' +
'\t API Gateway version: '.cyan + session.ApiGatewayVersion + '\n');
'\t API Gateway version: '.cyan + session.ApiGatewayVersion + '\n'
);
logger.info('Fetching videos info... \n');
const videos: Array<StreamVideo> = createUniquePath(
const videos: Array<Video> = createUniquePath(
await getStreamInfo(videoUrls, session, argv.closedCaptions),
argv.outputTemplate, argv.format, argv.skip
);
if (argv.simulate) {
videos.forEach((video: StreamVideo) => {
videos.forEach((video: Video) => {
logger.info(
'\nTitle: '.green + video.title +
'\nOutPath: '.green + video.outPath +
@@ -170,3 +173,70 @@ export async function downloadStreamVideo(videoUrls: Array<VideoUrl>): Promise<v
process.removeListener('SIGINT', cleanupFn);
}
}
// TODO: complete overhaul of this function
export async function downloadShareVideo(videoUrls: Array<VideoUrl>): Promise<void> {
const shareUrlRegex = new RegExp(/(?<domain>https:\/\/.+\.sharepoint\.com)(?<baseSite>\/sites\/.*?)(?:(?<filename>\/.*\.mp4)|\/.*id=(?<paramFilename>.*mp4))/);
logger.info('Downloading SharePoint videos...\n\n');
// FIXME: this may change we need a smart login system if a request fails
const session = await doShareLogin(videoUrls[0].url, argv.username);
for (const videoUrl of videoUrls) {
const match = shareUrlRegex.exec(videoUrl.url);
if (!match) {
logger.error(`Invalid url '${videoUrl.url}', skipping...`);
continue;
}
const shareDomain = match.groups!.domain;
const shareSite = match.groups!.baseSite;
const shareFilepath = decodeURIComponent(match.groups?.filename ? (shareSite + match.groups.filename) : match.groups!.paramFilename);
// FIXME: hardcoded video.mp4
const title = shareFilepath.split('/').pop()?.split('.')[0] ?? 'video';
const apiClient = new ShareApiClient(shareDomain, shareSite, session);
const video = await apiClient.getVideoInfo(shareFilepath, videoUrl.outDir);
createUniquePath(video, title, argv.format, argv.skip);
if (argv.simulate) {
if (argv.verbose) {
console.dir(video);
}
else {
logger.info(
'\nTitle: '.green + video.title +
'\nOutPath: '.green + video.outPath +
'\nPublished Date: '.green + video.publishDate +
'\nPlayback URL: '.green + video.playbackUrl
);
}
continue;
}
if (video.direct) {
const headers = `Cookie: rtFa=${session.rtFa}; FedAuth=${session.FedAuth}`;
// FIXME: unstable and bad all-around
try {
execSync(
'aria2c --max-connection-per-server 8 --console-log-level warn ' +
`--header "${headers}" --dir "${path.dirname(video.outPath)}" --out "${path.basename(video.outPath)}" "${shareDomain + shareFilepath}"`,
{ stdio: 'inherit' }
);
}
catch (error: any) {
logger.error(`${error.status} \n\n${error.message} \n\n${error.stdout.toString()} \n\n${error.stderr.toString()}`);
}
}
else {
logger.verbose('TODO: manifest download');
continue;
}
}
}

View File

@@ -3,6 +3,7 @@ export const enum ERROR_CODE {
ELEVATED_SHELL,
CANCELLED_USER_INPUT,
MISSING_FFMPEG,
MISSING_ARIA2,
OUTDATED_FFMPEG,
UNK_FFMPEG_ERROR,
INVALID_VIDEO_GUID,
@@ -22,7 +23,10 @@ export const errors: { [key: number]: string } = {
[ERROR_CODE.MISSING_FFMPEG]: 'FFmpeg is missing!\n' +
'Destreamer requires a fairly recent release of FFmpeg to download videos',
[ERROR_CODE.MISSING_FFMPEG]: 'The FFmpeg version currently installed is too old!\n' +
[ERROR_CODE.MISSING_ARIA2]: 'Aria2 is missing!\n' +
'Destreamer requires a fairly recent release of Aria2 to download videos',
[ERROR_CODE.OUTDATED_FFMPEG]: 'The FFmpeg version currently installed is too old!\n' +
'Destreamer requires a fairly recent release of FFmpeg to download videos',
[ERROR_CODE.UNK_FFMPEG_ERROR]: 'Unknown FFmpeg error',

View File

@@ -3,82 +3,171 @@ import puppeteer from 'puppeteer';
import { getPuppeteerChromiumPath } from './PuppeteerHelper';
import { chromeCacheFolder } from './destreamer';
import { argv } from './CommandLineParser';
import { StreamSession } from './Types';
import { ShareSession, StreamSession } from './Types';
import { ERROR_CODE } from './Errors';
import { TokenCache } from './TokenCache';
export async function doStreamLogin(url: string, tokenCache: TokenCache, username?: string): Promise<StreamSession> {
logger.info('Launching headless Chrome to perform the OpenID Connect dance...');
const browser: puppeteer.Browser = await puppeteer.launch({
executablePath: getPuppeteerChromiumPath(),
headless: false,
userDataDir: (argv.keepLoginCookies) ? chromeCacheFolder : undefined,
defaultViewport: null,
args: [
'--disable-dev-shm-usage',
'--fast-start',
'--no-sandbox'
]
});
const page: puppeteer.Page = (await browser.pages())[0];
logger.info('Navigating to login page...');
await page.goto(url, { waitUntil: 'load' });
// try-finally because we were leaving zombie processes if there was an error
try {
if (username) {
await page.waitForSelector('input[type="email"]', { timeout: 3000 });
await page.keyboard.type(username);
await page.click('input[type="submit"]');
}
else {
/* If a username was not provided we let the user take actions that
lead up to the video page. */
}
}
catch (e) {
/* If there is no email input selector we aren't in the login module,
we are probably using the cache to aid the login.
It could finish the login on its own if the user said 'yes' when asked to
remember the credentials or it could still prompt the user for a password */
}
const page: puppeteer.Page = (await browser.pages())[0];
await browser.waitForTarget((target: puppeteer.Target) => target.url().endsWith('microsoftstream.com/'), { timeout: 150000 });
logger.info('We are logged in.');
logger.info('Navigating to login page...');
await page.goto(url, { waitUntil: 'load' });
let session: StreamSession | null = null;
let tries = 1;
while (!session) {
try {
let sessionInfo: any;
session = await page.evaluate(
() => {
return {
AccessToken: sessionInfo.AccessToken,
ApiGatewayUri: sessionInfo.ApiGatewayUri,
ApiGatewayVersion: sessionInfo.ApiGatewayVersion
};
}
);
if (username) {
await page.waitForSelector('input[type="email"]', { timeout: 3000 });
await page.keyboard.type(username);
await page.click('input[type="submit"]');
}
else {
/* If a username was not provided we let the user take actions that
lead up to the video page. */
}
}
catch (error) {
if (tries > 5) {
process.exit(ERROR_CODE.NO_SESSION_INFO);
catch (e) {
/* If there is no email input selector we aren't in the login module,
we are probably using the cache to aid the login.
It could finish the login on its own if the user said 'yes' when asked to
remember the credentials or it could still prompt the user for a password */
}
await browser.waitForTarget((target: puppeteer.Target) => target.url().endsWith('microsoftstream.com/'), { timeout: 150000 });
logger.info('We are logged in.');
let session: StreamSession | null = null;
let tries = 1;
while (!session) {
try {
let sessionInfo: any;
session = await page.evaluate(
() => {
return {
AccessToken: sessionInfo.AccessToken,
ApiGatewayUri: sessionInfo.ApiGatewayUri,
ApiGatewayVersion: sessionInfo.ApiGatewayVersion
};
}
);
}
catch (error) {
if (tries > 5) {
process.exit(ERROR_CODE.NO_SESSION_INFO);
}
session = null;
tries++;
await page.waitForTimeout(3000);
}
}
tokenCache.Write(session);
logger.info('Wrote access token to token cache.');
logger.info("At this point Chromium's job is done, shutting it down...\n");
return session;
}
finally {
await browser.close();
}
}
export async function doShareLogin(url: string, username?: string): Promise<ShareSession> {
logger.info('Launching headless Chrome to perform the OpenID Connect dance...');
const hostname = new URL(url).hostname;
const browser: puppeteer.Browser = await puppeteer.launch({
executablePath: getPuppeteerChromiumPath(),
headless: false,
devtools: argv.verbose,
userDataDir: (argv.keepLoginCookies) ? chromeCacheFolder : undefined,
defaultViewport: null,
args: [
'--disable-dev-shm-usage',
'--fast-start',
'--no-sandbox'
]
});
// try-finally because we were leaving zombie processes if there was an error
try {
const page: puppeteer.Page = (await browser.pages())[0];
logger.info('Navigating to login page...');
await page.goto(url, { waitUntil: 'load' });
try {
if (username) {
await page.waitForSelector('input[type="email"]', { timeout: 3000 });
await page.keyboard.type(username);
await page.click('input[type="submit"]');
}
else {
/* If a username was not provided we let the user take actions that
lead up to the video page. */
}
}
catch (e) {
/* If there is no email input selector we aren't in the login module,
we are probably using the cache to aid the login.
It could finish the login on its own if the user said 'yes' when asked to
remember the credentials or it could still prompt the user for a password */
}
logger.info('Waiting for target!');
await browser.waitForTarget((target: puppeteer.Target) => target.url().includes(hostname), { timeout: 150000 });
logger.info('We are logged in.');
let session: ShareSession | null = null;
let tries = 1;
while (!session) {
const cookieJar = (await page.cookies()).filter(
biscuit => biscuit.name == 'rtFa' || biscuit.name == 'FedAuth'
);
if (cookieJar.length != 2) {
if (tries > 5) {
process.exit(ERROR_CODE.NO_SESSION_INFO);
}
await page.waitForTimeout(1000 * tries++);
continue;
}
session = null;
tries++;
await page.waitForTimeout(3000);
session = {
rtFa: cookieJar.find(biscuit => biscuit.name == 'rtFa')!.value,
FedAuth: cookieJar.find(biscuit => biscuit.name == 'FedAuth')!.value
};
}
logger.info("At this point Chromium's job is done, shutting it down...\n");
// await page.waitForTimeout(1000 * 60 * 60 * 60);
return session;
}
finally {
await browser.close();
}
tokenCache.Write(session);
logger.info('Wrote access token to token cache.');
logger.info("At this point Chromium's job is done, shutting it down...\n");
await browser.close();
return session;
}

View File

@@ -5,6 +5,12 @@ export type StreamSession = {
}
export type ShareSession = {
FedAuth: string;
rtFa: string;
}
export type VideoUrl = {
url: string,
outDir: string
@@ -20,8 +26,9 @@ export type SharepointVideo = {
}
export type StreamVideo = {
guid: string;
export type Video = {
guid?: string;
direct?: boolean;
title: string;
duration: string;
publishDate: string;
@@ -32,7 +39,7 @@ export type StreamVideo = {
outPath: string;
totalChunks: number; // Abstraction of FFmpeg timemark
playbackUrl: string;
posterImageUrl: string | null;
posterImageUrl?: string;
captionsUrl?: string
}

View File

@@ -16,8 +16,6 @@ const shareUrlRegex = new RegExp(/https?:\/\/.+\.sharepoint\.com.*/);
export async function extractStreamGuids(urlList: Array<VideoUrl>, session: StreamSession): Promise<Array<VideoUrl>> {
const videoRegex = new RegExp(/https:\/\/.*\/video\/(\w{8}-(?:\w{4}-){3}\w{12})/);
const groupRegex = new RegExp(/https:\/\/.*\/group\/(\w{8}-(?:\w{4}-){3}\w{12})/);
// const sharepointDirect = new RegExp(/https:\/\/(?<hostname>.+\.sharepoint\.com)\/(?:.*\/)?(?<filename>.*\.mp4)/);
// const sharepointEncoded = new RegExp(/https:\/\/(?<hostname>.+\.sharepoint\.com)\/.*id=(?<encodedFilename>.*mp4)/);
const apiClient: StreamApiClient = StreamApiClient.getInstance(session);
const guidList: Array<VideoUrl> = [];
@@ -213,6 +211,21 @@ export function checkRequirements(): void {
catch (e) {
process.exit(ERROR_CODE.MISSING_FFMPEG);
}
try {
const versionRegex = new RegExp(/aria2 version (.*)/);
const aira2Ver: string = execSync('aria2c --version').toString().split('\n')[0];
if (versionRegex.test(aira2Ver)) {
logger.verbose(`Using ${aira2Ver}\n`);
}
else {
throw new Error();
}
}
catch (e) {
process.exit(ERROR_CODE.MISSING_ARIA2);
}
}

View File

@@ -1,7 +1,7 @@
import { StreamApiClient } from './ApiClient';
import { promptUser } from './CommandLineParser';
import { logger } from './Logger';
import { StreamVideo, StreamSession, VideoUrl } from './Types';
import { Video, StreamSession, VideoUrl } from './Types';
import { AxiosResponse } from 'axios';
import fs from 'fs';
@@ -10,7 +10,7 @@ import path from 'path';
import sanitizeWindowsName from 'sanitize-filename';
import { extractStreamGuids } from './Utils';
function publishedDateToString(date: string): string {
export function publishedDateToString(date: string): string {
const dateJs: Date = new Date(date);
const day: string = dateJs.getDate().toString().padStart(2, '0');
const month: string = (dateJs.getMonth() + 1).toString(10).padStart(2, '0');
@@ -19,7 +19,7 @@ function publishedDateToString(date: string): string {
}
function publishedTimeToString(date: string): string {
export function publishedTimeToString(date: string): string {
const dateJs: Date = new Date(date);
const hours: string = dateJs.getHours().toString();
const minutes: string = dateJs.getMinutes().toString();
@@ -46,8 +46,8 @@ function durationToTotalChunks(duration: string): number {
}
export async function getStreamInfo(videoUrls: Array<VideoUrl>, session: StreamSession, subtitles?: boolean): Promise<Array<StreamVideo>> {
const metadata: Array<StreamVideo> = [];
export async function getStreamInfo(videoUrls: Array<VideoUrl>, session: StreamSession, subtitles?: boolean): Promise<Array<Video>> {
const metadata: Array<Video> = [];
let title: string;
let duration: string;
let publishDate: string;
@@ -137,16 +137,24 @@ export async function getStreamInfo(videoUrls: Array<VideoUrl>, session: StreamS
}
export function createUniquePath(videos: Array<StreamVideo>, template: string, format: string, skip?: boolean): Array<StreamVideo> {
export function createUniquePath(videos: Array<Video>, template: string, format: string, skip?: boolean): Array<Video>
export function createUniquePath(videos: Video, template: string, format: string, skip?: boolean): Video
export function createUniquePath(videos: Array<Video> | Video, template: string, format: string, skip?: boolean): Array<Video> | Video {
let singleInput = false;
videos.forEach((video: StreamVideo) => {
if (!Array.isArray(videos)) {
videos = [videos];
singleInput = true;
}
videos.forEach((video: Video) => {
let title: string = template;
let finalTitle: string;
const elementRegEx = RegExp(/{(.*?)}/g);
let match = elementRegEx.exec(template);
while (match) {
const value = video[match[1] as keyof StreamVideo] as string;
const value = video[match[1] as keyof (Video)] as string;
title = title.replace(match[0], value);
match = elementRegEx.exec(template);
}
@@ -168,5 +176,9 @@ export function createUniquePath(videos: Array<StreamVideo>, template: string, f
});
if (singleInput) {
return videos[0];
}
return videos;
}

View File

@@ -6,7 +6,7 @@ import { VideoUrl } from './Types';
import { checkRequirements, parseInputFile, parseCLIinput } from './Utils';
import isElevated from 'is-elevated';
import { downloadStreamVideo } from './Downloaders';
import { downloadShareVideo, downloadStreamVideo } from './Downloaders';
export const chromeCacheFolder = '.chrome_data';
@@ -48,13 +48,18 @@ async function main(): Promise<void> {
[streamVideos, shareVideos] = await parseInputFile(argv.inputFile!, argv.outputDirectory);
}
logger.verbose('List of GUIDs and corresponding output directory \n' +
streamVideos.map(video => `\t${video.url} => ${video.outDir} \n`).join(''));
logger.verbose(
'List of urls and corresponding output directory \n' +
streamVideos.map(video => `\t${video.url} => ${video.outDir} \n`).join('') +
shareVideos.map(video => `\t${video.url} => ${video.outDir} \n`).join('')
);
await downloadStreamVideo(streamVideos);
logger.debug(shareVideos);
if (streamVideos.length) {
await downloadStreamVideo(streamVideos);
}
if (shareVideos.length) {
await downloadShareVideo(shareVideos);
}
}