subtitle parsing progress, still a bit buggy

This commit is contained in:
ThaUnknown 2020-09-15 01:57:55 +02:00
parent a868626af1
commit 792021bcb4
5 changed files with 229 additions and 211 deletions

View file

@ -306,4 +306,4 @@ async function rssFetch(url) {
alRequest()
search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
// search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/

View file

@ -138,7 +138,7 @@
<div class="overflow-y-hidden content-wrapper">
<section id="player">
<a href="#player" class="w-full h-full"></a>
<video id="video" class="w-full" autoPictureInPicture preload>
<video id="video" class="w-full" autoPictureInPicture>
</video>
<div class="player d-none flex-column justify-content-between w-full h-full">
<div class="h-full w-full d-flex flex-column justify-content-between" id="ptoggle">

View file

@ -13068,229 +13068,246 @@ if (typeof Object.create === 'function') {
},{}],14:[function(require,module,exports){
(function (Buffer){
const Transform = require('readable-stream').Transform
const ebml = require('ebml')
const ebmlBlock = require('ebml-block')
const readElement = require('./lib/read-element')
// track elements we care about
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
const CUES_ID = Buffer.from('1C53BB6B', 'hex')
class MatroskaSubtitles extends Transform {
constructor ({ prevInstance, offset } = {}) {
super()
let currentTrack = null
let currentSubtitleBlock = null
let currentClusterTimecode = null
let currentSeekID = null
this.decoder = new ebml.Decoder()
if (prevInstance instanceof MatroskaSubtitles) {
if (offset == null) throw new Error('no offset')
prevInstance.once('drain', () => {
// prevInstance.end()
// console.log('prevInstance drain')
})
// copy previous metadata
this.subtitleTracks = prevInstance.subtitleTracks
this.timecodeScale = prevInstance.timecodeScale
this.cues = prevInstance.cues
if (!this.cues) {
return console.warn('No cues.')
}
// find a cue that's close to the file offset
// const cueArray = Uint32Array.from(this.cues.positions)
// cueArray.sort()
const cueArray = Array.from(this.cues.positions)
cueArray.sort((a, b) => a - b)
const closestCue = cueArray.find(i => i >= offset)
if (closestCue != null) {
// prepare to skip file stream until we hit a cue position
this.skip = closestCue - offset
// set internal decoder position to output consistent file offsets
this.decoder.total = closestCue
// console.log('using cue:', closestCue)
this.decoder.on('data', _onMetaData.bind(this))
} else {
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
}
} else {
if (offset) return console.error(`Offset is ${offset}, and must be 0 for initial instance!`)
this.subtitleTracks = new Map()
this.timecodeScale = 1
this.decoder.on('data', _onMetaData.bind(this))
}
let waitForNext = false
function _onMetaData (chunk) {
if (waitForNext) {
waitForNext = false
// Keep cues if this is the same segment
if (!this.cues || this.cues.start !== chunk[1].start) {
// Add 0 as a valid cue point
this.cues = { start: chunk[1].start, positions: new Set([0]) }
const Transform = require('readable-stream').Transform
const ebml = require('ebml')
const ebmlBlock = require('ebml-block')
const readElement = require('./lib/read-element')
// track elements we care about
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
const CUES_ID = Buffer.from('1C53BB6B', 'hex')
class MatroskaSubtitles extends Transform {
constructor ({ prevInstance, offset } = {}) {
super()
let currentTrack = null
let currentSubtitleBlock = null
let currentClusterTimecode = null
let currentSeekID = null
let waitForNext = false
this.decoder = new ebml.Decoder()
if (prevInstance instanceof MatroskaSubtitles) {
if (offset == null) throw new Error('no offset')
prevInstance.once('drain', () => {
// prevInstance.end()
console.log('prevInstance drained')
})
if (offset === 0) {
// just begin normal parsing
this.subtitleTracks = prevInstance.subtitleTracks || new Map()
this.timecodeScale = prevInstance.timecodeScale || 1
this.cues = prevInstance.cues
this.decoder.on('data', _onMetaData.bind(this))
return
}
// copy previous metadata
this.subtitleTracks = prevInstance.subtitleTracks
this.timecodeScale = prevInstance.timecodeScale
this.cues = prevInstance.cues
if (!this.cues) {
this.decoder = null
return console.warn('No cues was parsed. Subtitle parsing disabled.')
}
// find a cue that's close to the file offset
// const cueArray = Uint32Array.from(this.cues.positions)
// cueArray.sort()
const cueArray = Array.from(this.cues.positions)
cueArray.sort((a, b) => a - b)
const closestCue = cueArray.find(i => i >= offset)
if (closestCue != null) {
// prepare to skip file stream until we hit a cue position
this.skip = closestCue - offset
// set internal decoder position to output consistent file offsets
this.decoder.total = closestCue
// console.log('using cue:', closestCue)
this.decoder.on('data', _onMetaData.bind(this))
} else {
this.decoder = null
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
}
} else {
if (offset) {
this.decoder = null
console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
return
}
this.subtitleTracks = new Map()
this.timecodeScale = 1
this.decoder.on('data', _onMetaData.bind(this))
}
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
// TODO: only record first segment?
// TODO: find a simpler way to do this
waitForNext = true
}
if (chunk[1].name === 'SeekID') {
// TODO: .value is undefined for some reason?
currentSeekID = chunk[1].data
}
if (currentSeekID && chunk[1].name === 'SeekPosition') {
if (CUES_ID.equals(currentSeekID)) {
// hack: this is not a cue position, but the position to the cue data itself,
// in case it's not located at the beginning of the file.
function _onMetaData (chunk) {
if (waitForNext) {
waitForNext = false
// Keep cues if this is the same segment
if (!this.cues || this.cues.start !== chunk[1].start) {
this.cues = { start: chunk[1].start, positions: new Set() }
}
}
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
// TODO: only record first segment?
// TODO: find a simpler way to do this
waitForNext = true
}
if (chunk[1].name === 'SeekID') {
// TODO: .value is undefined for some reason?
currentSeekID = chunk[1].data
}
if (currentSeekID && chunk[1].name === 'SeekPosition') {
if (CUES_ID.equals(currentSeekID)) {
// hack: this is not a cue position, but the position to the cue data itself,
// in case it's not located at the beginning of the file.
this.cues.positions.add(this.cues.start + chunk[1].value)
}
}
if (chunk[1].name === 'CueClusterPosition') {
this.cues.positions.add(this.cues.start + chunk[1].value)
}
}
if (chunk[1].name === 'CueClusterPosition') {
this.cues.positions.add(this.cues.start + chunk[1].value)
}
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
this.emit('cues')
}
// Segment Information
if (chunk[1].name === 'TimecodeScale') {
this.timecodeScale = readElement(chunk[1]) / 1000000
}
// Tracks
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readElement(chunk[1])
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
this.emit('cues')
}
}
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
if (currentTrack.TrackType === 0x11) { // Subtitle Track
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
const track = {
number: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7).toLowerCase()
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
track.header = currentTrack.CodecPrivate.toString('utf8')
}
this.subtitleTracks.set(currentTrack.TrackNumber, track)
// Segment Information
if (chunk[1].name === 'TimecodeScale') {
this.timecodeScale = readElement(chunk[1]) / 1000000
}
// Tracks
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readElement(chunk[1])
}
}
currentTrack = null
}
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
// this.decoder.removeListener('data', _onMetaData)
// if (this.subtitleTracks.size <= 0) return this.end()
// this.decoder.on('data', _onClusterData)
this.emit('tracks', Array.from(this.subtitleTracks.values()))
}
// }
// function _onClusterData (chunk) {
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readElement(chunk[1])
}
if (chunk[1].name === 'Block') {
const block = ebmlBlock(chunk[1].data)
if (this.subtitleTracks.has(block.trackNumber)) {
const type = this.subtitleTracks.get(block.trackNumber).type
const subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
}
if (type === 'ass' || type === 'ssa') {
// extract SSA/ASS keys
const values = subtitle.text.split(',')
// ignore read-order, and skip layer if ssa
let i = type === 'ssa' ? 2 : 1
for (; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
}
// re-append extra text that might have been split
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
if (currentTrack.TrackType === 0x11) { // Subtitle Track
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
const track = {
number: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7).toLowerCase()
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
track.header = currentTrack.CodecPrivate.toString('utf8')
}
this.subtitleTracks.set(currentTrack.TrackNumber, track)
}
}
currentSubtitleBlock = [subtitle, block.trackNumber]
currentTrack = null
}
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
// this.decoder.removeListener('data', _onMetaData)
// if (this.subtitleTracks.size <= 0) return this.end()
// this.decoder.on('data', _onClusterData)
this.emit('tracks', Array.from(this.subtitleTracks.values()))
}
// }
// function _onClusterData (chunk) {
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readElement(chunk[1])
}
if (chunk[1].name === 'Block') {
const block = ebmlBlock(chunk[1].data)
if (this.subtitleTracks.has(block.trackNumber)) {
const type = this.subtitleTracks.get(block.trackNumber).type
const subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
}
if (type === 'ass' || type === 'ssa') {
// extract SSA/ASS keys
const values = subtitle.text.split(',')
// ignore read-order, and skip layer if ssa
let i = type === 'ssa' ? 2 : 1
for (; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
}
// re-append extra text that might have been split
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
}
}
currentSubtitleBlock = [subtitle, block.trackNumber]
}
}
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
this.emit('subtitle', ...currentSubtitleBlock)
currentSubtitleBlock = null
}
}
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
this.emit('subtitle', ...currentSubtitleBlock)
currentSubtitleBlock = null
}
}
}
_transform (chunk, _, callback) {
if (this.skip) {
// skip bytes to reach cue position
if (this.skip < chunk.length) {
// slice chunk
const sc = chunk.slice(this.skip)
this.skip = 0
this.decoder.write(sc)
_transform (chunk, _, callback) {
if (!this.decoder) return callback(null, chunk)
if (this.skip) {
// skip bytes to reach cue position
if (this.skip < chunk.length) {
// slice chunk
const sc = chunk.slice(this.skip)
this.skip = 0
this.decoder.write(sc)
} else {
// skip entire chunk
this.skip -= chunk.length
}
} else {
// skip entire chunk
this.skip -= chunk.length
this.decoder.write(chunk)
}
} else {
this.decoder.write(chunk)
callback(null, chunk)
}
callback(null, chunk)
}
}
module.exports = MatroskaSubtitles
module.exports = MatroskaSubtitles
}).call(this,require("buffer").Buffer)
},{"./lib/read-element":15,"buffer":3,"ebml":12,"ebml-block":9,"readable-stream":31}],15:[function(require,module,exports){

View file

@ -89,6 +89,7 @@ video[src=""] {
}
video::cue {
background: none;
font-family: "Open Sans", sans-serif;
color: #fff;
text-shadow: 2px 2px 0 #000,
@ -100,7 +101,6 @@ video::cue {
-2px 0px 0 #000,
0px -2px 0 #000,
2px 2px 2px #000;
background: none;
}
.controls {

View file

@ -74,7 +74,7 @@ function addTorrent(magnet) {
alertType: "alert-success",
fillType: ""
});
finishThumbnails();
// finishThumbnails(); //disabled for performance and testing reasons
})
torrent.on('noPeers', function () {
halfmoon.initStickyAlert({
@ -130,6 +130,7 @@ function serveFile(file, req) {
res.headers['Cache-Control'] = 'no-store'
res.body = req.method === 'HEAD' ? '' : 'stream'
console.log('set parser', range)
parser = new MatroskaSubtitles({ prevInstance: parser, offset: range.start })