subtitle parsing progress, still a bit buggy

2026-05-19 06:01:47 +00:00 · 2020-09-15 01:57:55 +02:00 · 2020-09-15 01:57:55 +02:00 · 792021bcb4
commit 792021bcb4
parent a868626af1
5 changed files with 229 additions and 211 deletions
--- a/app/animeHandler.js
+++ b/app/animeHandler.js
@ -306,4 +306,4 @@ async function rssFetch(url) {

 alRequest()

-search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
+// search = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
--- a/app/app.html
+++ b/app/app.html
@ -138,7 +138,7 @@
        <div class="overflow-y-hidden content-wrapper">
            <section id="player">
                <a href="#player" class="w-full h-full"></a>
-                <video id="video" class="w-full" autoPictureInPicture preload>
+                <video id="video" class="w-full" autoPictureInPicture>
                </video>
                <div class="player d-none flex-column justify-content-between w-full h-full">
                    <div class="h-full w-full d-flex flex-column justify-content-between" id="ptoggle">
--- a/app/bundle.js
+++ b/app/bundle.js
@ -13068,229 +13068,246 @@ if (typeof Object.create === 'function') {

 },{}],14:[function(require,module,exports){
 (function (Buffer){
-const Transform = require('readable-stream').Transform
-const ebml = require('ebml')
-const ebmlBlock = require('ebml-block')
-const readElement = require('./lib/read-element')
-
-// track elements we care about
-const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
-const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
-const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
-
-const CUES_ID = Buffer.from('1C53BB6B', 'hex')
-
-class MatroskaSubtitles extends Transform {
-  constructor ({ prevInstance, offset } = {}) {
-    super()
-
-    let currentTrack = null
-    let currentSubtitleBlock = null
-    let currentClusterTimecode = null
-
-    let currentSeekID = null
-
-    this.decoder = new ebml.Decoder()
-
-    if (prevInstance instanceof MatroskaSubtitles) {
-      if (offset == null) throw new Error('no offset')
-
-      prevInstance.once('drain', () => {
-        // prevInstance.end()
-        // console.log('prevInstance drain')
-      })
-
-      // copy previous metadata
-      this.subtitleTracks = prevInstance.subtitleTracks
-      this.timecodeScale = prevInstance.timecodeScale
-      this.cues = prevInstance.cues
-
-      if (!this.cues) {
-        return console.warn('No cues.')
-      }
-
-      // find a cue that's close to the file offset
-      // const cueArray = Uint32Array.from(this.cues.positions)
-      // cueArray.sort()
-      const cueArray = Array.from(this.cues.positions)
-      cueArray.sort((a, b) => a - b)
-
-      const closestCue = cueArray.find(i => i >= offset)
-
-      if (closestCue != null) {
-        // prepare to skip file stream until we hit a cue position
-        this.skip = closestCue - offset
-        // set internal decoder position to output consistent file offsets
-        this.decoder.total = closestCue
-
-        // console.log('using cue:', closestCue)
-
-        this.decoder.on('data', _onMetaData.bind(this))
-      } else {
-        console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
-      }
-    } else {
-      if (offset) return console.error(`Offset is ${offset}, and must be 0 for initial instance!`)
-
-      this.subtitleTracks = new Map()
-      this.timecodeScale = 1
-
-      this.decoder.on('data', _onMetaData.bind(this))
-    }
-
-    let waitForNext = false
-
-    function _onMetaData (chunk) {
-      if (waitForNext) {
-        waitForNext = false
-        // Keep cues if this is the same segment
-        if (!this.cues || this.cues.start !== chunk[1].start) {
-          // Add 0 as a valid cue point
-          this.cues = { start: chunk[1].start, positions: new Set([0]) }
+  const Transform = require('readable-stream').Transform
+  const ebml = require('ebml')
+  const ebmlBlock = require('ebml-block')
+  const readElement = require('./lib/read-element')
+  
+  // track elements we care about
+  const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
+  const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
+  const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
+  
+  const CUES_ID = Buffer.from('1C53BB6B', 'hex')
+  
+  class MatroskaSubtitles extends Transform {
+    constructor ({ prevInstance, offset } = {}) {
+      super()
+  
+      let currentTrack = null
+      let currentSubtitleBlock = null
+      let currentClusterTimecode = null
+  
+      let currentSeekID = null
+  
+      let waitForNext = false
+  
+      this.decoder = new ebml.Decoder()
+  
+      if (prevInstance instanceof MatroskaSubtitles) {
+        if (offset == null) throw new Error('no offset')
+  
+        prevInstance.once('drain', () => {
+          // prevInstance.end()
+          console.log('prevInstance drained')
+        })
+  
+        if (offset === 0) {
+          // just begin normal parsing
+          this.subtitleTracks = prevInstance.subtitleTracks || new Map()
+          this.timecodeScale = prevInstance.timecodeScale || 1
+          this.cues = prevInstance.cues
+  
+          this.decoder.on('data', _onMetaData.bind(this))
+          return
        }
+  
+        // copy previous metadata
+        this.subtitleTracks = prevInstance.subtitleTracks
+        this.timecodeScale = prevInstance.timecodeScale
+        this.cues = prevInstance.cues
+  
+        if (!this.cues) {
+          this.decoder = null
+          return console.warn('No cues was parsed. Subtitle parsing disabled.')
+        }
+  
+        // find a cue that's close to the file offset
+        // const cueArray = Uint32Array.from(this.cues.positions)
+        // cueArray.sort()
+        const cueArray = Array.from(this.cues.positions)
+        cueArray.sort((a, b) => a - b)
+  
+        const closestCue = cueArray.find(i => i >= offset)
+  
+        if (closestCue != null) {
+          // prepare to skip file stream until we hit a cue position
+          this.skip = closestCue - offset
+          // set internal decoder position to output consistent file offsets
+          this.decoder.total = closestCue
+  
+          // console.log('using cue:', closestCue)
+  
+          this.decoder.on('data', _onMetaData.bind(this))
+        } else {
+          this.decoder = null
+          console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
+        }
+      } else {
+        if (offset) {
+          this.decoder = null
+          console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
+          return
+        }
+  
+        this.subtitleTracks = new Map()
+        this.timecodeScale = 1
+  
+        this.decoder.on('data', _onMetaData.bind(this))
      }
-
-      if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
-        // TODO: only record first segment?
-        // TODO: find a simpler way to do this
-        waitForNext = true
-      }
-
-      if (chunk[1].name === 'SeekID') {
-        // TODO: .value is undefined for some reason?
-        currentSeekID = chunk[1].data
-      }
-
-      if (currentSeekID && chunk[1].name === 'SeekPosition') {
-        if (CUES_ID.equals(currentSeekID)) {
-          // hack: this is not a cue position, but the position to the cue data itself,
-          //       in case it's not located at the beginning of the file.
+  
+      function _onMetaData (chunk) {
+        if (waitForNext) {
+          waitForNext = false
+          // Keep cues if this is the same segment
+          if (!this.cues || this.cues.start !== chunk[1].start) {
+            this.cues = { start: chunk[1].start, positions: new Set() }
+          }
+        }
+  
+        if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
+          // TODO: only record first segment?
+          // TODO: find a simpler way to do this
+          waitForNext = true
+        }
+  
+        if (chunk[1].name === 'SeekID') {
+          // TODO: .value is undefined for some reason?
+          currentSeekID = chunk[1].data
+        }
+  
+        if (currentSeekID && chunk[1].name === 'SeekPosition') {
+          if (CUES_ID.equals(currentSeekID)) {
+            // hack: this is not a cue position, but the position to the cue data itself,
+            //       in case it's not located at the beginning of the file.
+            this.cues.positions.add(this.cues.start + chunk[1].value)
+          }
+        }
+  
+        if (chunk[1].name === 'CueClusterPosition') {
          this.cues.positions.add(this.cues.start + chunk[1].value)
        }
-      }
-
-      if (chunk[1].name === 'CueClusterPosition') {
-        this.cues.positions.add(this.cues.start + chunk[1].value)
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
-        this.emit('cues')
-      }
-
-      // Segment Information
-      if (chunk[1].name === 'TimecodeScale') {
-        this.timecodeScale = readElement(chunk[1]) / 1000000
-      }
-
-      // Tracks
-      if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
-        currentTrack = {}
-      }
-
-      if (currentTrack && chunk[0] === 'tag') {
-        // save info about track currently being scanned
-        if (TRACK_ELEMENTS.includes(chunk[1].name)) {
-          currentTrack[chunk[1].name] = readElement(chunk[1])
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
+          this.emit('cues')
        }
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
-        if (currentTrack.TrackType === 0x11) { // Subtitle Track
-          if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
-            const track = {
-              number: currentTrack.TrackNumber,
-              language: currentTrack.Language,
-              type: currentTrack.CodecID.substring(7).toLowerCase()
-            }
-
-            if (currentTrack.CodecPrivate) {
-              // only SSA/ASS
-              track.header = currentTrack.CodecPrivate.toString('utf8')
-            }
-
-            this.subtitleTracks.set(currentTrack.TrackNumber, track)
+  
+        // Segment Information
+        if (chunk[1].name === 'TimecodeScale') {
+          this.timecodeScale = readElement(chunk[1]) / 1000000
+        }
+  
+        // Tracks
+        if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
+          currentTrack = {}
+        }
+  
+        if (currentTrack && chunk[0] === 'tag') {
+          // save info about track currently being scanned
+          if (TRACK_ELEMENTS.includes(chunk[1].name)) {
+            currentTrack[chunk[1].name] = readElement(chunk[1])
          }
        }
-        currentTrack = null
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
-        // this.decoder.removeListener('data', _onMetaData)
-
-        // if (this.subtitleTracks.size <= 0) return this.end()
-
-        // this.decoder.on('data', _onClusterData)
-        this.emit('tracks', Array.from(this.subtitleTracks.values()))
-      }
-      // }
-
-      // function _onClusterData (chunk) {
-      // TODO: assuming this is a Cluster `Timecode`
-      if (chunk[1].name === 'Timecode') {
-        currentClusterTimecode = readElement(chunk[1])
-      }
-
-      if (chunk[1].name === 'Block') {
-        const block = ebmlBlock(chunk[1].data)
-
-        if (this.subtitleTracks.has(block.trackNumber)) {
-          const type = this.subtitleTracks.get(block.trackNumber).type
-
-          const subtitle = {
-            text: block.frames[0].toString('utf8'),
-            time: (block.timecode + currentClusterTimecode) * this.timecodeScale
-          }
-
-          if (type === 'ass' || type === 'ssa') {
-            // extract SSA/ASS keys
-            const values = subtitle.text.split(',')
-            // ignore read-order, and skip layer if ssa
-            let i = type === 'ssa' ? 2 : 1
-            for (; i < 9; i++) {
-              subtitle[ASS_KEYS[i]] = values[i]
-            }
-            // re-append extra text that might have been split
-            for (i = 9; i < values.length; i++) {
-              subtitle.text += ',' + values[i]
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
+          if (currentTrack.TrackType === 0x11) { // Subtitle Track
+            if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
+              const track = {
+                number: currentTrack.TrackNumber,
+                language: currentTrack.Language,
+                type: currentTrack.CodecID.substring(7).toLowerCase()
+              }
+  
+              if (currentTrack.CodecPrivate) {
+                // only SSA/ASS
+                track.header = currentTrack.CodecPrivate.toString('utf8')
+              }
+  
+              this.subtitleTracks.set(currentTrack.TrackNumber, track)
            }
          }
-
-          currentSubtitleBlock = [subtitle, block.trackNumber]
+          currentTrack = null
+        }
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
+          // this.decoder.removeListener('data', _onMetaData)
+  
+          // if (this.subtitleTracks.size <= 0) return this.end()
+  
+          // this.decoder.on('data', _onClusterData)
+          this.emit('tracks', Array.from(this.subtitleTracks.values()))
+        }
+        // }
+  
+        // function _onClusterData (chunk) {
+        // TODO: assuming this is a Cluster `Timecode`
+        if (chunk[1].name === 'Timecode') {
+          currentClusterTimecode = readElement(chunk[1])
+        }
+  
+        if (chunk[1].name === 'Block') {
+          const block = ebmlBlock(chunk[1].data)
+  
+          if (this.subtitleTracks.has(block.trackNumber)) {
+            const type = this.subtitleTracks.get(block.trackNumber).type
+  
+            const subtitle = {
+              text: block.frames[0].toString('utf8'),
+              time: (block.timecode + currentClusterTimecode) * this.timecodeScale
+            }
+  
+            if (type === 'ass' || type === 'ssa') {
+              // extract SSA/ASS keys
+              const values = subtitle.text.split(',')
+              // ignore read-order, and skip layer if ssa
+              let i = type === 'ssa' ? 2 : 1
+              for (; i < 9; i++) {
+                subtitle[ASS_KEYS[i]] = values[i]
+              }
+              // re-append extra text that might have been split
+              for (i = 9; i < values.length; i++) {
+                subtitle.text += ',' + values[i]
+              }
+            }
+  
+            currentSubtitleBlock = [subtitle, block.trackNumber]
+          }
+        }
+  
+        // TODO: assuming `BlockDuration` exists and always comes after `Block`
+        if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
+          currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
+  
+          this.emit('subtitle', ...currentSubtitleBlock)
+  
+          currentSubtitleBlock = null
        }
-      }
-
-      // TODO: assuming `BlockDuration` exists and always comes after `Block`
-      if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
-        currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
-
-        this.emit('subtitle', ...currentSubtitleBlock)
-
-        currentSubtitleBlock = null
      }
    }
-  }
-
-  _transform (chunk, _, callback) {
-    if (this.skip) {
-      // skip bytes to reach cue position
-      if (this.skip < chunk.length) {
-        // slice chunk
-        const sc = chunk.slice(this.skip)
-        this.skip = 0
-        this.decoder.write(sc)
+  
+    _transform (chunk, _, callback) {
+      if (!this.decoder) return callback(null, chunk)
+  
+      if (this.skip) {
+        // skip bytes to reach cue position
+        if (this.skip < chunk.length) {
+          // slice chunk
+          const sc = chunk.slice(this.skip)
+          this.skip = 0
+          this.decoder.write(sc)
+        } else {
+          // skip entire chunk
+          this.skip -= chunk.length
+        }
      } else {
-        // skip entire chunk
-        this.skip -= chunk.length
+        this.decoder.write(chunk)
      }
-    } else {
-      this.decoder.write(chunk)
+  
+      callback(null, chunk)
    }
-
-    callback(null, chunk)
  }
-}
-
-module.exports = MatroskaSubtitles
+  
+  module.exports = MatroskaSubtitles

 }).call(this,require("buffer").Buffer)
 },{"./lib/read-element":15,"buffer":3,"ebml":12,"ebml-block":9,"readable-stream":31}],15:[function(require,module,exports){
--- a/app/css2.css
+++ b/app/css2.css
@ -89,6 +89,7 @@ video[src=""] {
 }

 video::cue {
+    background: none;
    font-family: "Open Sans", sans-serif;
    color: #fff;
    text-shadow: 2px 2px 0 #000,
@ -100,7 +101,6 @@ video::cue {
        -2px 0px 0 #000,
        0px -2px 0 #000,
        2px 2px 2px #000;
-    background: none;
 }

 .controls {
--- a/app/torrentHandler.js
+++ b/app/torrentHandler.js
@ -74,7 +74,7 @@ function addTorrent(magnet) {
                alertType: "alert-success",
                fillType: ""
            });
-            finishThumbnails();
+            // finishThumbnails(); //disabled for performance and testing reasons
        })
        torrent.on('noPeers', function () {
            halfmoon.initStickyAlert({
@ -130,6 +130,7 @@ function serveFile(file, req) {

    res.headers['Cache-Control'] = 'no-store'
    res.body = req.method === 'HEAD' ? '' : 'stream'
+    console.log('set parser', range)

    parser = new MatroskaSubtitles({ prevInstance: parser, offset: range.start })