subtitle parsing done testing needed, releases work, minor changes

2026-04-20 14:12:06 +00:00 · 2020-09-16 00:06:06 +02:00 · 2020-09-16 00:06:06 +02:00 · ece50f1299
commit ece50f1299
parent 95fc4ccc71
4 changed files with 296 additions and 250 deletions
--- a/app/app.html
+++ b/app/app.html
@ -104,19 +104,19 @@
                </a>
                <div class="sidebar-divider"></div>
                <a href="#releases" class="sidebar-link sidebar-link-with-icon">
-                    <span class="sidebar-icon">
+                    <span class="sidebar-icon bg-transparent justify-content-start mr-0">
                        <i class="material-icons" aria-hidden="true">schedule</i>
                    </span>
                    Latest Releases
                </a>
                <a href="#browse" class="sidebar-link sidebar-link-with-icon">
-                    <span class="sidebar-icon">
+                    <span class="sidebar-icon bg-transparent justify-content-start mr-0">
                        <i class="material-icons" aria-hidden="true">list</i>
                    </span>
                    Browse
                </a>
                <a href="#settings" class="sidebar-link sidebar-link-with-icon mt-auto">
-                    <span class="sidebar-icon">
+                    <span class="sidebar-icon bg-transparent justify-content-start mr-0">
                        <i class="material-icons" aria-hidden="true">settings</i>
                    </span>
                    Settings
--- a/app/js/animeHandler.js
+++ b/app/js/animeHandler.js
@ -122,21 +122,20 @@ async function alRequest(a, b) {
    })

    let res = await fetch(url, options).catch((error) => console.error(error)),
-    json = await res.json();
+        json = await res.json();
    return json
 }
-
+let alResponse
 async function searchAnime(a) {
-    let request = await alRequest(a)
-    console.log(request);
+    alResponse = await alRequest(a)
+    console.log(alResponse);
    let frag = document.createDocumentFragment()
    document.querySelector(".browse").textContent = '';
    try {
-        request.data.Page.media.forEach((media, index) => {
+        alResponse.data.Page.media.forEach(media => {
            let template = document.createElement("div")
            template.classList.add("card", "m-0", "p-0")
            template.innerHTML = `
-
            <div class="row h-full">
                <div class="col-4">
                    <img src="${media.coverImage.extraLarge}"
@ -160,10 +159,9 @@ async function searchAnime(a) {
                    </div>
                </div>
            </div>
-
            `
            template.onclick = function () {
-                viewAnime(request.data.Page.media[index])
+                viewAnime(media)
            }
            frag.appendChild(template)
        })
@ -294,4 +292,21 @@ async function nyaaRss(url) {
 }


-// rx = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
+let regex = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/,
+    str = `[HorribleSubs] Black Clover - 143 [1080p].mkv`,
+    m,
+    store = {};
+function regtest() {
+    if ((m = regex.exec(str)) !== null) {
+        if (m[2].endsWith(" - ")) {
+            m[2] = m[2].slice(0, -3)
+        }
+        if (!store[m[2]] && !alResponse.data.Page.media.some(media => (Object.values(media.title).concat(media.synonyms).filter(name => name != null).includes(m[2]) && ((store[m[2]] = media) && true)))) {
+            //shit not found, lookup
+        }
+
+        m.forEach((match, groupIndex) => {
+            console.log(`Found match, group ${groupIndex}: ${match}`);
+        });
+    }
+}
--- a/app/js/bundle.js
+++ b/app/js/bundle.js
@ -13069,251 +13069,283 @@ if (typeof Object.create === 'function') {
 },{}],14:[function(require,module,exports){
 (function (Buffer){
  const Transform = require('readable-stream').Transform
-const ebml = require('ebml')
-const ebmlBlock = require('ebml-block')
-const readElement = require('./lib/read-element')
-
-// track elements we care about
-const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
-const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
-const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
-
-const CUES_ID = Buffer.from('1C53BB6B', 'hex')
-
-class MatroskaSubtitles extends Transform {
-  constructor ({ prevInstance, offset } = {}) {
-    super()
-
-    let currentTrack = null
-    let currentSubtitleBlock = null
-    let currentClusterTimecode = null
-
-    let currentSeekID = null
-
-    let waitForNext = false
-
-    this.decoder = new ebml.Decoder()
-
-    if (prevInstance instanceof MatroskaSubtitles) {
-      if (offset == null) throw new Error('no offset')
-
-      prevInstance.once('drain', () => {
-        // prevInstance.end()
-        console.log('prevInstance drained')
-      })
-
-      if (offset === 0) {
-        // just begin normal parsing
-        this.subtitleTracks = prevInstance.subtitleTracks || new Map()
-        this.timecodeScale = prevInstance.timecodeScale || 1
+  const ebml = require('ebml')
+  const ebmlBlock = require('ebml-block')
+  const readElement = require('./lib/read-element')
+  
+  // track elements we care about
+  const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
+  const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
+  const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
+  
+  // const CUES_ID = Buffer.from('1C53BB6B', 'hex')
+  
+  class MatroskaSubtitles extends Transform {
+    constructor ({ prevInstance, offset } = {}) {
+      super()
+  
+      this.id = (Math.random() * 10000) | 0
+      this.offset = offset
+      this.bcount = 0
+  
+      let currentTrack = null
+      let currentSubtitleBlock = null
+      let currentClusterTimecode = null
+  
+      let currentSeekID = null
+  
+      let waitForNext = false
+  
+      this.on('close', () => {
+        console.log('CLOSED:', this.id)
+      });
+  
+      this.on('finish', () => {
+        console.log('FINISH:', this.id)
+      });
+  
+      this.decoder = new ebml.Decoder()
+  
+      if (prevInstance instanceof MatroskaSubtitles) {
+        if (offset == null) throw new Error('no offset')
+  
+        if (prevInstance.decoder) console.log(`prevInstance id=${prevInstance.id}: decoder t=${prevInstance.decoder.total}, c=${prevInstance.decoder.cursor}`)
+  
+        prevInstance.once('drain', () => {
+          console.log(`prevInstancee id=${prevInstance.id}: drained`)
+          // prevInstance.end()
+        })
+  
+        if (offset === 0) {
+          // just begin normal parsing
+          this.subtitleTracks = prevInstance.subtitleTracks || new Map()
+          this.timecodeScale = prevInstance.timecodeScale || 1
+          this.cues = prevInstance.cues
+  
+          this.decoder.on('data', _onMetaData.bind(this))
+          return
+        }
+  
+        // copy previous metadata
+        this.subtitleTracks = prevInstance.subtitleTracks
+        this.timecodeScale = prevInstance.timecodeScale
        this.cues = prevInstance.cues
-
-        this.decoder.on('data', _onMetaData.bind(this))
-        return
-      }
-
-      // copy previous metadata
-      this.subtitleTracks = prevInstance.subtitleTracks
-      this.timecodeScale = prevInstance.timecodeScale
-      this.cues = prevInstance.cues
-
-      if (!this.cues) {
-        this.decoder = null
-        return console.warn('No cues was parsed. Subtitle parsing disabled.')
-      }
-
-      // find a cue that's close to the file offset
-      // const cueArray = Uint32Array.from(this.cues.positions)
-      // cueArray.sort()
-      const cueArray = Array.from(this.cues.positions)
-      cueArray.sort((a, b) => a - b)
-
-      const closestCue = cueArray.find(i => i >= offset)
-
-      if (closestCue != null) {
-        // prepare to skip file stream until we hit a cue position
-        this.skip = closestCue - offset
-        // set internal decoder position to output consistent file offsets
-        this.decoder.total = closestCue
-
-        // console.log('using cue:', closestCue)
-
-        this.decoder.on('data', _onMetaData.bind(this))
-      } else {
-        this.decoder = null
-        console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
-      }
-    } else {
-      if (offset) {
-        this.decoder = null
-        console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
-        return
-      }
-
-      this.subtitleTracks = new Map()
-      this.timecodeScale = 1
-
-      this.decoder.on('data', _onMetaData.bind(this))
-    }
-
-    function _onMetaData (chunk) {
-      if (waitForNext) {
-        waitForNext = false
-        // Keep cues if this is the same segment
+  
        if (!this.cues) {
-          this.cues = { start: chunk[1].start, positions: new Set() }
-        } else if (this.cues.start !== chunk[1].start) {
-          this.cues = { start: chunk[1].start, positions: new Set() }
-          console.warn('New segment found - resetting cues! Not sure we can handle this!?')
+          this.decoder = null
+          // TODO: AVOID THIS! - we can actually add seek points without the segment start (decoder.total), but this would cause problems later
+          return console.warn('No cues was parsed. Subtitle parsing disabled.')
+        }
+  
+        // TODO: should always be the case, but we currently allow some slack (initial instance not at z=0)
+        if (prevInstance.decoder) {
+          // use the position of the previous decoder as a valid seek point
+          // this can help if offset is changed before parsing seeks and cues
+          const decoderPosition = prevInstance.decoder.total - prevInstance.decoder.cursor
+          this.cues.positions.add(decoderPosition)
+        }
+          
+        // find a cue that's close to the file offset
+        // const cueArray = Uint32Array.from(this.cues.positions)
+        // cueArray.sort()
+        const cueArray = Array.from(this.cues.positions)
+        cueArray.sort((a, b) => a - b)
+  
+        const closestCue = cueArray.find(i => i >= offset)
+  
+        if (closestCue != null) {
+          // prepare to skip file stream until we hit a cue position
+          this.skip = closestCue - offset
+          // set internal decoder position to output consistent file offsets
+          this.decoder.total = closestCue
+  
+          // console.log('using cue:', closestCue)
+  
+          this.decoder.on('data', _onMetaData.bind(this))
        } else {
-          console.info('Saw first segment again. Keeping cues.')
+          this.decoder = null
+          console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
        }
-      }
-
-      if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
-        // TODO: only record first segment?
-        // TODO: find a simpler way to do this
-        waitForNext = true
-      }
-
-      if (chunk[1].name === 'SeekID') {
-        // TODO: .value is undefined for some reason?
-        currentSeekID = chunk[1].data
-      }
-
-      if (currentSeekID && chunk[1].name === 'SeekPosition') {
-        //if (CUES_ID.equals(currentSeekID)) {
-          // hack: this is not a cue position, but the position to the cue data itself,
-          //       in case it's not located at the beginning of the file.
-          // actually, just add all seek positions.
-          this.cues.positions.add(this.cues.start + chunk[1].value)
-        //}
-      }
-
-      if (chunk[1].name === 'CueClusterPosition') {
-        this.cues.positions.add(this.cues.start + chunk[1].value)
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
-        this.emit('cues')
-      }
-
-      // Segment Information
-      if (chunk[1].name === 'TimecodeScale') {
-        this.timecodeScale = readElement(chunk[1]) / 1000000
-      }
-
-      // Tracks
-      if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
-        currentTrack = {}
-      }
-
-      if (currentTrack && chunk[0] === 'tag') {
-        // save info about track currently being scanned
-        if (TRACK_ELEMENTS.includes(chunk[1].name)) {
-          currentTrack[chunk[1].name] = readElement(chunk[1])
-        }
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
-        if (currentTrack.TrackType === 0x11) { // Subtitle Track
-          if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
-            const track = {
-              number: currentTrack.TrackNumber,
-              language: currentTrack.Language,
-              type: currentTrack.CodecID.substring(7).toLowerCase()
-            }
-
-            if (currentTrack.CodecPrivate) {
-              // only SSA/ASS
-              track.header = currentTrack.CodecPrivate.toString('utf8')
-            }
-
-            this.subtitleTracks.set(currentTrack.TrackNumber, track)
-          }
-        }
-        currentTrack = null
-      }
-
-      if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
-        // this.decoder.removeListener('data', _onMetaData)
-
-        // if (this.subtitleTracks.size <= 0) return this.end()
-
-        // this.decoder.on('data', _onClusterData)
-        this.emit('tracks', Array.from(this.subtitleTracks.values()))
-      }
-      // }
-
-      // function _onClusterData (chunk) {
-      // TODO: assuming this is a Cluster `Timecode`
-      if (chunk[1].name === 'Timecode') {
-        currentClusterTimecode = readElement(chunk[1])
-      }
-
-      if (chunk[1].name === 'Block') {
-        const block = ebmlBlock(chunk[1].data)
-
-        if (this.subtitleTracks.has(block.trackNumber)) {
-          const type = this.subtitleTracks.get(block.trackNumber).type
-
-          const subtitle = {
-            text: block.frames[0].toString('utf8'),
-            time: (block.timecode + currentClusterTimecode) * this.timecodeScale
-          }
-
-          if (type === 'ass' || type === 'ssa') {
-            // extract SSA/ASS keys
-            const values = subtitle.text.split(',')
-            // ignore read-order, and skip layer if ssa
-            let i = type === 'ssa' ? 2 : 1
-            for (; i < 9; i++) {
-              subtitle[ASS_KEYS[i]] = values[i]
-            }
-            // re-append extra text that might have been split
-            for (i = 9; i < values.length; i++) {
-              subtitle.text += ',' + values[i]
-            }
-          }
-
-          currentSubtitleBlock = [subtitle, block.trackNumber]
-        }
-      }
-
-      // TODO: assuming `BlockDuration` exists and always comes after `Block`
-      if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
-        currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
-
-        this.emit('subtitle', ...currentSubtitleBlock)
-
-        currentSubtitleBlock = null
-      }
-    }
-  }
-
-  _transform (chunk, _, callback) {
-    if (!this.decoder) return callback(null, chunk)
-
-    if (this.skip) {
-      // skip bytes to reach cue position
-      if (this.skip < chunk.length) {
-        // slice chunk
-        const sc = chunk.slice(this.skip)
-        this.skip = 0
-        this.decoder.write(sc)
      } else {
-        // skip entire chunk
-        this.skip -= chunk.length
+        if (offset) {
+          this.decoder = null
+          console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
+          return
+        }
+  
+        this.subtitleTracks = new Map()
+        this.timecodeScale = 1
+  
+        this.decoder.on('data', _onMetaData.bind(this))
+      }
+  
+      function _onMetaData (chunk) {
+        if (waitForNext) {
+          waitForNext = false
+          // Keep cues if this is the same segment
+          if (!this.cues) {
+            this.cues = { start: chunk[1].start, positions: new Set() }
+          } else if (this.cues.start !== chunk[1].start) {
+            this.cues = { start: chunk[1].start, positions: new Set() }
+            console.warn('New segment found - resetting cues! Not sure we can handle this!?')
+          } else {
+            console.info('Saw first segment again. Keeping cues.')
+          }
+        }
+  
+        if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
+          // TODO: only record first segment?
+          // TODO: find a simpler way to do this
+          waitForNext = true
+        }
+  
+        if (chunk[1].name === 'SeekID') {
+          // TODO: .value is undefined for some reason?
+          currentSeekID = chunk[1].data
+        }
+  
+        if (currentSeekID && chunk[1].name === 'SeekPosition') {
+          //if (CUES_ID.equals(currentSeekID)) {
+            // hack: this is not a cue position, but the position to the cue data itself,
+            //       in case it's not located at the beginning of the file.
+            // actually, just add all seek positions.
+            this.cues.positions.add(this.cues.start + chunk[1].value)
+          //}
+        }
+  
+        if (chunk[1].name === 'CueClusterPosition') {
+          this.cues.positions.add(this.cues.start + chunk[1].value)
+        }
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
+          this.emit('cues')
+        }
+  
+        // Segment Information
+        if (chunk[1].name === 'TimecodeScale') {
+          this.timecodeScale = readElement(chunk[1]) / 1000000
+        }
+  
+        // Tracks
+        if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
+          currentTrack = {}
+        }
+  
+        if (currentTrack && chunk[0] === 'tag') {
+          // save info about track currently being scanned
+          if (TRACK_ELEMENTS.includes(chunk[1].name)) {
+            currentTrack[chunk[1].name] = readElement(chunk[1])
+          }
+        }
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
+          if (currentTrack.TrackType === 0x11) { // Subtitle Track
+            if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
+              const track = {
+                number: currentTrack.TrackNumber,
+                language: currentTrack.Language,
+                type: currentTrack.CodecID.substring(7).toLowerCase()
+              }
+  
+              if (currentTrack.CodecPrivate) {
+                // only SSA/ASS
+                track.header = currentTrack.CodecPrivate.toString('utf8')
+              }
+  
+              this.subtitleTracks.set(currentTrack.TrackNumber, track)
+            }
+          }
+          currentTrack = null
+        }
+  
+        if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
+          // this.decoder.removeListener('data', _onMetaData)
+  
+          // if (this.subtitleTracks.size <= 0) return this.end()
+  
+          // this.decoder.on('data', _onClusterData)
+          this.emit('tracks', Array.from(this.subtitleTracks.values()))
+        }
+        // }
+  
+        // function _onClusterData (chunk) {
+        // TODO: assuming this is a Cluster `Timecode`
+        if (chunk[1].name === 'Timecode') {
+          currentClusterTimecode = readElement(chunk[1])
+        }
+  
+        if (chunk[1].name === 'Block') {
+          const block = ebmlBlock(chunk[1].data)
+  
+          if (this.subtitleTracks.has(block.trackNumber)) {
+            const type = this.subtitleTracks.get(block.trackNumber).type
+  
+            const subtitle = {
+              text: block.frames[0].toString('utf8'),
+              time: (block.timecode + currentClusterTimecode) * this.timecodeScale
+            }
+  
+            if (type === 'ass' || type === 'ssa') {
+              // extract SSA/ASS keys
+              const values = subtitle.text.split(',')
+              // ignore read-order, and skip layer if ssa
+              let i = type === 'ssa' ? 2 : 1
+              for (; i < 9; i++) {
+                subtitle[ASS_KEYS[i]] = values[i]
+              }
+              // re-append extra text that might have been split
+              for (i = 9; i < values.length; i++) {
+                subtitle.text += ',' + values[i]
+              }
+            }
+  
+            currentSubtitleBlock = [subtitle, block.trackNumber]
+          }
+        }
+  
+        // TODO: assuming `BlockDuration` exists and always comes after `Block`
+        if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
+          currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
+  
+          this.emit('subtitle', ...currentSubtitleBlock)
+  
+          currentSubtitleBlock = null
+        }
      }
-    } else {
-      this.decoder.write(chunk)
    }
-
-    callback(null, chunk)
+  
+    _transform (chunk, _, callback) {
+      console.log(`Write id=${this.id}: z=${this.offset}, l=${chunk.length}, skip=${this.skip} pos=${(this.offset || 0) + this.bcount}`)
+      this.bcount += chunk.length
+  
+      if (!this.decoder) {
+        console.warn('Skipped decoder')
+        return callback(null, chunk)
+      }
+  
+      if (this.skip) {
+        if (this.skip > 1048576 * 20) {
+          console.warn(this.id, 'High skip value... This is bad.')
+        }
+        // skip bytes to reach cue position
+        if (this.skip < chunk.length) {
+          // slice chunk
+          const sc = chunk.slice(this.skip)
+          this.skip = 0
+          this.decoder.write(sc)
+        } else {
+          // skip entire chunk
+          this.skip -= chunk.length
+        }
+      } else {
+        this.decoder.write(chunk)
+      }
+  
+      callback(null, chunk)
+    }
  }
-}
-
-module.exports = MatroskaSubtitles
+  
+  module.exports = MatroskaSubtitles

 }).call(this,require("buffer").Buffer)
 },{"./lib/read-element":15,"buffer":3,"ebml":12,"ebml-block":9,"readable-stream":31}],15:[function(require,module,exports){
--- a/app/js/subtitleHandler.js
+++ b/app/js/subtitleHandler.js
@ -9,7 +9,6 @@ function parseSubs(range, stream) {
    parser = new MatroskaSubtitles({ prevInstance: parser, offset: range.start })

    parser.once('tracks', function (pTracks) {
-      console.log(pTracks)
      tracks = []
      pTracks.forEach(track => {
        tracks[track.number] = video.addTextTrack('captions', track.type, track.language || track.number)