mirror of
https://github.com/ThaUnknown/miru.git
synced 2026-03-29 02:48:43 +00:00
subtitle parsing progress, still a bit buggy
This commit is contained in:
parent
a868626af1
commit
792021bcb4
5 changed files with 229 additions and 211 deletions
|
|
@ -306,4 +306,4 @@ async function rssFetch(url) {
|
|||
|
||||
alRequest()
|
||||
|
||||
search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
|
||||
// search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
|
||||
|
|
@ -138,7 +138,7 @@
|
|||
<div class="overflow-y-hidden content-wrapper">
|
||||
<section id="player">
|
||||
<a href="#player" class="w-full h-full"></a>
|
||||
<video id="video" class="w-full" autoPictureInPicture preload>
|
||||
<video id="video" class="w-full" autoPictureInPicture>
|
||||
</video>
|
||||
<div class="player d-none flex-column justify-content-between w-full h-full">
|
||||
<div class="h-full w-full d-flex flex-column justify-content-between" id="ptoggle">
|
||||
|
|
|
|||
431
app/bundle.js
431
app/bundle.js
|
|
@ -13068,229 +13068,246 @@ if (typeof Object.create === 'function') {
|
|||
|
||||
},{}],14:[function(require,module,exports){
|
||||
(function (Buffer){
|
||||
const Transform = require('readable-stream').Transform
|
||||
const ebml = require('ebml')
|
||||
const ebmlBlock = require('ebml-block')
|
||||
const readElement = require('./lib/read-element')
|
||||
|
||||
// track elements we care about
|
||||
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
|
||||
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
|
||||
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
|
||||
|
||||
const CUES_ID = Buffer.from('1C53BB6B', 'hex')
|
||||
|
||||
class MatroskaSubtitles extends Transform {
|
||||
constructor ({ prevInstance, offset } = {}) {
|
||||
super()
|
||||
|
||||
let currentTrack = null
|
||||
let currentSubtitleBlock = null
|
||||
let currentClusterTimecode = null
|
||||
|
||||
let currentSeekID = null
|
||||
|
||||
this.decoder = new ebml.Decoder()
|
||||
|
||||
if (prevInstance instanceof MatroskaSubtitles) {
|
||||
if (offset == null) throw new Error('no offset')
|
||||
|
||||
prevInstance.once('drain', () => {
|
||||
// prevInstance.end()
|
||||
// console.log('prevInstance drain')
|
||||
})
|
||||
|
||||
// copy previous metadata
|
||||
this.subtitleTracks = prevInstance.subtitleTracks
|
||||
this.timecodeScale = prevInstance.timecodeScale
|
||||
this.cues = prevInstance.cues
|
||||
|
||||
if (!this.cues) {
|
||||
return console.warn('No cues.')
|
||||
}
|
||||
|
||||
// find a cue that's close to the file offset
|
||||
// const cueArray = Uint32Array.from(this.cues.positions)
|
||||
// cueArray.sort()
|
||||
const cueArray = Array.from(this.cues.positions)
|
||||
cueArray.sort((a, b) => a - b)
|
||||
|
||||
const closestCue = cueArray.find(i => i >= offset)
|
||||
|
||||
if (closestCue != null) {
|
||||
// prepare to skip file stream until we hit a cue position
|
||||
this.skip = closestCue - offset
|
||||
// set internal decoder position to output consistent file offsets
|
||||
this.decoder.total = closestCue
|
||||
|
||||
// console.log('using cue:', closestCue)
|
||||
|
||||
this.decoder.on('data', _onMetaData.bind(this))
|
||||
} else {
|
||||
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
|
||||
}
|
||||
} else {
|
||||
if (offset) return console.error(`Offset is ${offset}, and must be 0 for initial instance!`)
|
||||
|
||||
this.subtitleTracks = new Map()
|
||||
this.timecodeScale = 1
|
||||
|
||||
this.decoder.on('data', _onMetaData.bind(this))
|
||||
}
|
||||
|
||||
let waitForNext = false
|
||||
|
||||
function _onMetaData (chunk) {
|
||||
if (waitForNext) {
|
||||
waitForNext = false
|
||||
// Keep cues if this is the same segment
|
||||
if (!this.cues || this.cues.start !== chunk[1].start) {
|
||||
// Add 0 as a valid cue point
|
||||
this.cues = { start: chunk[1].start, positions: new Set([0]) }
|
||||
const Transform = require('readable-stream').Transform
|
||||
const ebml = require('ebml')
|
||||
const ebmlBlock = require('ebml-block')
|
||||
const readElement = require('./lib/read-element')
|
||||
|
||||
// track elements we care about
|
||||
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
|
||||
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
|
||||
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
|
||||
|
||||
const CUES_ID = Buffer.from('1C53BB6B', 'hex')
|
||||
|
||||
class MatroskaSubtitles extends Transform {
|
||||
constructor ({ prevInstance, offset } = {}) {
|
||||
super()
|
||||
|
||||
let currentTrack = null
|
||||
let currentSubtitleBlock = null
|
||||
let currentClusterTimecode = null
|
||||
|
||||
let currentSeekID = null
|
||||
|
||||
let waitForNext = false
|
||||
|
||||
this.decoder = new ebml.Decoder()
|
||||
|
||||
if (prevInstance instanceof MatroskaSubtitles) {
|
||||
if (offset == null) throw new Error('no offset')
|
||||
|
||||
prevInstance.once('drain', () => {
|
||||
// prevInstance.end()
|
||||
console.log('prevInstance drained')
|
||||
})
|
||||
|
||||
if (offset === 0) {
|
||||
// just begin normal parsing
|
||||
this.subtitleTracks = prevInstance.subtitleTracks || new Map()
|
||||
this.timecodeScale = prevInstance.timecodeScale || 1
|
||||
this.cues = prevInstance.cues
|
||||
|
||||
this.decoder.on('data', _onMetaData.bind(this))
|
||||
return
|
||||
}
|
||||
|
||||
// copy previous metadata
|
||||
this.subtitleTracks = prevInstance.subtitleTracks
|
||||
this.timecodeScale = prevInstance.timecodeScale
|
||||
this.cues = prevInstance.cues
|
||||
|
||||
if (!this.cues) {
|
||||
this.decoder = null
|
||||
return console.warn('No cues was parsed. Subtitle parsing disabled.')
|
||||
}
|
||||
|
||||
// find a cue that's close to the file offset
|
||||
// const cueArray = Uint32Array.from(this.cues.positions)
|
||||
// cueArray.sort()
|
||||
const cueArray = Array.from(this.cues.positions)
|
||||
cueArray.sort((a, b) => a - b)
|
||||
|
||||
const closestCue = cueArray.find(i => i >= offset)
|
||||
|
||||
if (closestCue != null) {
|
||||
// prepare to skip file stream until we hit a cue position
|
||||
this.skip = closestCue - offset
|
||||
// set internal decoder position to output consistent file offsets
|
||||
this.decoder.total = closestCue
|
||||
|
||||
// console.log('using cue:', closestCue)
|
||||
|
||||
this.decoder.on('data', _onMetaData.bind(this))
|
||||
} else {
|
||||
this.decoder = null
|
||||
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
|
||||
}
|
||||
} else {
|
||||
if (offset) {
|
||||
this.decoder = null
|
||||
console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
|
||||
return
|
||||
}
|
||||
|
||||
this.subtitleTracks = new Map()
|
||||
this.timecodeScale = 1
|
||||
|
||||
this.decoder.on('data', _onMetaData.bind(this))
|
||||
}
|
||||
|
||||
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
|
||||
// TODO: only record first segment?
|
||||
// TODO: find a simpler way to do this
|
||||
waitForNext = true
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'SeekID') {
|
||||
// TODO: .value is undefined for some reason?
|
||||
currentSeekID = chunk[1].data
|
||||
}
|
||||
|
||||
if (currentSeekID && chunk[1].name === 'SeekPosition') {
|
||||
if (CUES_ID.equals(currentSeekID)) {
|
||||
// hack: this is not a cue position, but the position to the cue data itself,
|
||||
// in case it's not located at the beginning of the file.
|
||||
|
||||
function _onMetaData (chunk) {
|
||||
if (waitForNext) {
|
||||
waitForNext = false
|
||||
// Keep cues if this is the same segment
|
||||
if (!this.cues || this.cues.start !== chunk[1].start) {
|
||||
this.cues = { start: chunk[1].start, positions: new Set() }
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
|
||||
// TODO: only record first segment?
|
||||
// TODO: find a simpler way to do this
|
||||
waitForNext = true
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'SeekID') {
|
||||
// TODO: .value is undefined for some reason?
|
||||
currentSeekID = chunk[1].data
|
||||
}
|
||||
|
||||
if (currentSeekID && chunk[1].name === 'SeekPosition') {
|
||||
if (CUES_ID.equals(currentSeekID)) {
|
||||
// hack: this is not a cue position, but the position to the cue data itself,
|
||||
// in case it's not located at the beginning of the file.
|
||||
this.cues.positions.add(this.cues.start + chunk[1].value)
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'CueClusterPosition') {
|
||||
this.cues.positions.add(this.cues.start + chunk[1].value)
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'CueClusterPosition') {
|
||||
this.cues.positions.add(this.cues.start + chunk[1].value)
|
||||
}
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
|
||||
this.emit('cues')
|
||||
}
|
||||
|
||||
// Segment Information
|
||||
if (chunk[1].name === 'TimecodeScale') {
|
||||
this.timecodeScale = readElement(chunk[1]) / 1000000
|
||||
}
|
||||
|
||||
// Tracks
|
||||
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
|
||||
currentTrack = {}
|
||||
}
|
||||
|
||||
if (currentTrack && chunk[0] === 'tag') {
|
||||
// save info about track currently being scanned
|
||||
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
|
||||
currentTrack[chunk[1].name] = readElement(chunk[1])
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
|
||||
this.emit('cues')
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
|
||||
if (currentTrack.TrackType === 0x11) { // Subtitle Track
|
||||
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
|
||||
const track = {
|
||||
number: currentTrack.TrackNumber,
|
||||
language: currentTrack.Language,
|
||||
type: currentTrack.CodecID.substring(7).toLowerCase()
|
||||
}
|
||||
|
||||
if (currentTrack.CodecPrivate) {
|
||||
// only SSA/ASS
|
||||
track.header = currentTrack.CodecPrivate.toString('utf8')
|
||||
}
|
||||
|
||||
this.subtitleTracks.set(currentTrack.TrackNumber, track)
|
||||
|
||||
// Segment Information
|
||||
if (chunk[1].name === 'TimecodeScale') {
|
||||
this.timecodeScale = readElement(chunk[1]) / 1000000
|
||||
}
|
||||
|
||||
// Tracks
|
||||
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
|
||||
currentTrack = {}
|
||||
}
|
||||
|
||||
if (currentTrack && chunk[0] === 'tag') {
|
||||
// save info about track currently being scanned
|
||||
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
|
||||
currentTrack[chunk[1].name] = readElement(chunk[1])
|
||||
}
|
||||
}
|
||||
currentTrack = null
|
||||
}
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
|
||||
// this.decoder.removeListener('data', _onMetaData)
|
||||
|
||||
// if (this.subtitleTracks.size <= 0) return this.end()
|
||||
|
||||
// this.decoder.on('data', _onClusterData)
|
||||
this.emit('tracks', Array.from(this.subtitleTracks.values()))
|
||||
}
|
||||
// }
|
||||
|
||||
// function _onClusterData (chunk) {
|
||||
// TODO: assuming this is a Cluster `Timecode`
|
||||
if (chunk[1].name === 'Timecode') {
|
||||
currentClusterTimecode = readElement(chunk[1])
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'Block') {
|
||||
const block = ebmlBlock(chunk[1].data)
|
||||
|
||||
if (this.subtitleTracks.has(block.trackNumber)) {
|
||||
const type = this.subtitleTracks.get(block.trackNumber).type
|
||||
|
||||
const subtitle = {
|
||||
text: block.frames[0].toString('utf8'),
|
||||
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
|
||||
}
|
||||
|
||||
if (type === 'ass' || type === 'ssa') {
|
||||
// extract SSA/ASS keys
|
||||
const values = subtitle.text.split(',')
|
||||
// ignore read-order, and skip layer if ssa
|
||||
let i = type === 'ssa' ? 2 : 1
|
||||
for (; i < 9; i++) {
|
||||
subtitle[ASS_KEYS[i]] = values[i]
|
||||
}
|
||||
// re-append extra text that might have been split
|
||||
for (i = 9; i < values.length; i++) {
|
||||
subtitle.text += ',' + values[i]
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
|
||||
if (currentTrack.TrackType === 0x11) { // Subtitle Track
|
||||
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
|
||||
const track = {
|
||||
number: currentTrack.TrackNumber,
|
||||
language: currentTrack.Language,
|
||||
type: currentTrack.CodecID.substring(7).toLowerCase()
|
||||
}
|
||||
|
||||
if (currentTrack.CodecPrivate) {
|
||||
// only SSA/ASS
|
||||
track.header = currentTrack.CodecPrivate.toString('utf8')
|
||||
}
|
||||
|
||||
this.subtitleTracks.set(currentTrack.TrackNumber, track)
|
||||
}
|
||||
}
|
||||
|
||||
currentSubtitleBlock = [subtitle, block.trackNumber]
|
||||
currentTrack = null
|
||||
}
|
||||
|
||||
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
|
||||
// this.decoder.removeListener('data', _onMetaData)
|
||||
|
||||
// if (this.subtitleTracks.size <= 0) return this.end()
|
||||
|
||||
// this.decoder.on('data', _onClusterData)
|
||||
this.emit('tracks', Array.from(this.subtitleTracks.values()))
|
||||
}
|
||||
// }
|
||||
|
||||
// function _onClusterData (chunk) {
|
||||
// TODO: assuming this is a Cluster `Timecode`
|
||||
if (chunk[1].name === 'Timecode') {
|
||||
currentClusterTimecode = readElement(chunk[1])
|
||||
}
|
||||
|
||||
if (chunk[1].name === 'Block') {
|
||||
const block = ebmlBlock(chunk[1].data)
|
||||
|
||||
if (this.subtitleTracks.has(block.trackNumber)) {
|
||||
const type = this.subtitleTracks.get(block.trackNumber).type
|
||||
|
||||
const subtitle = {
|
||||
text: block.frames[0].toString('utf8'),
|
||||
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
|
||||
}
|
||||
|
||||
if (type === 'ass' || type === 'ssa') {
|
||||
// extract SSA/ASS keys
|
||||
const values = subtitle.text.split(',')
|
||||
// ignore read-order, and skip layer if ssa
|
||||
let i = type === 'ssa' ? 2 : 1
|
||||
for (; i < 9; i++) {
|
||||
subtitle[ASS_KEYS[i]] = values[i]
|
||||
}
|
||||
// re-append extra text that might have been split
|
||||
for (i = 9; i < values.length; i++) {
|
||||
subtitle.text += ',' + values[i]
|
||||
}
|
||||
}
|
||||
|
||||
currentSubtitleBlock = [subtitle, block.trackNumber]
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: assuming `BlockDuration` exists and always comes after `Block`
|
||||
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
|
||||
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
|
||||
|
||||
this.emit('subtitle', ...currentSubtitleBlock)
|
||||
|
||||
currentSubtitleBlock = null
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: assuming `BlockDuration` exists and always comes after `Block`
|
||||
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
|
||||
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
|
||||
|
||||
this.emit('subtitle', ...currentSubtitleBlock)
|
||||
|
||||
currentSubtitleBlock = null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_transform (chunk, _, callback) {
|
||||
if (this.skip) {
|
||||
// skip bytes to reach cue position
|
||||
if (this.skip < chunk.length) {
|
||||
// slice chunk
|
||||
const sc = chunk.slice(this.skip)
|
||||
this.skip = 0
|
||||
this.decoder.write(sc)
|
||||
|
||||
_transform (chunk, _, callback) {
|
||||
if (!this.decoder) return callback(null, chunk)
|
||||
|
||||
if (this.skip) {
|
||||
// skip bytes to reach cue position
|
||||
if (this.skip < chunk.length) {
|
||||
// slice chunk
|
||||
const sc = chunk.slice(this.skip)
|
||||
this.skip = 0
|
||||
this.decoder.write(sc)
|
||||
} else {
|
||||
// skip entire chunk
|
||||
this.skip -= chunk.length
|
||||
}
|
||||
} else {
|
||||
// skip entire chunk
|
||||
this.skip -= chunk.length
|
||||
this.decoder.write(chunk)
|
||||
}
|
||||
} else {
|
||||
this.decoder.write(chunk)
|
||||
|
||||
callback(null, chunk)
|
||||
}
|
||||
|
||||
callback(null, chunk)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = MatroskaSubtitles
|
||||
|
||||
module.exports = MatroskaSubtitles
|
||||
|
||||
}).call(this,require("buffer").Buffer)
|
||||
},{"./lib/read-element":15,"buffer":3,"ebml":12,"ebml-block":9,"readable-stream":31}],15:[function(require,module,exports){
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ video[src=""] {
|
|||
}
|
||||
|
||||
video::cue {
|
||||
background: none;
|
||||
font-family: "Open Sans", sans-serif;
|
||||
color: #fff;
|
||||
text-shadow: 2px 2px 0 #000,
|
||||
|
|
@ -100,7 +101,6 @@ video::cue {
|
|||
-2px 0px 0 #000,
|
||||
0px -2px 0 #000,
|
||||
2px 2px 2px #000;
|
||||
background: none;
|
||||
}
|
||||
|
||||
.controls {
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ function addTorrent(magnet) {
|
|||
alertType: "alert-success",
|
||||
fillType: ""
|
||||
});
|
||||
finishThumbnails();
|
||||
// finishThumbnails(); //disabled for performance and testing reasons
|
||||
})
|
||||
torrent.on('noPeers', function () {
|
||||
halfmoon.initStickyAlert({
|
||||
|
|
@ -130,6 +130,7 @@ function serveFile(file, req) {
|
|||
|
||||
res.headers['Cache-Control'] = 'no-store'
|
||||
res.body = req.method === 'HEAD' ? '' : 'stream'
|
||||
console.log('set parser', range)
|
||||
|
||||
parser = new MatroskaSubtitles({ prevInstance: parser, offset: range.start })
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue