subtitle parsing done testing needed, releases work, minor changes

This commit is contained in:
ThaUnknown 2020-09-16 00:06:06 +02:00
parent 95fc4ccc71
commit ece50f1299
4 changed files with 296 additions and 250 deletions

View file

@ -104,19 +104,19 @@
</a>
<div class="sidebar-divider"></div>
<a href="#releases" class="sidebar-link sidebar-link-with-icon">
<span class="sidebar-icon">
<span class="sidebar-icon bg-transparent justify-content-start mr-0">
<i class="material-icons" aria-hidden="true">schedule</i>
</span>
Latest Releases
</a>
<a href="#browse" class="sidebar-link sidebar-link-with-icon">
<span class="sidebar-icon">
<span class="sidebar-icon bg-transparent justify-content-start mr-0">
<i class="material-icons" aria-hidden="true">list</i>
</span>
Browse
</a>
<a href="#settings" class="sidebar-link sidebar-link-with-icon mt-auto">
<span class="sidebar-icon">
<span class="sidebar-icon bg-transparent justify-content-start mr-0">
<i class="material-icons" aria-hidden="true">settings</i>
</span>
Settings

View file

@ -122,21 +122,20 @@ async function alRequest(a, b) {
})
let res = await fetch(url, options).catch((error) => console.error(error)),
json = await res.json();
json = await res.json();
return json
}
let alResponse
async function searchAnime(a) {
let request = await alRequest(a)
console.log(request);
alResponse = await alRequest(a)
console.log(alResponse);
let frag = document.createDocumentFragment()
document.querySelector(".browse").textContent = '';
try {
request.data.Page.media.forEach((media, index) => {
alResponse.data.Page.media.forEach(media => {
let template = document.createElement("div")
template.classList.add("card", "m-0", "p-0")
template.innerHTML = `
<div class="row h-full">
<div class="col-4">
<img src="${media.coverImage.extraLarge}"
@ -160,10 +159,9 @@ async function searchAnime(a) {
</div>
</div>
</div>
`
template.onclick = function () {
viewAnime(request.data.Page.media[index])
viewAnime(media)
}
frag.appendChild(template)
})
@ -294,4 +292,21 @@ async function nyaaRss(url) {
}
// rx = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/
let regex = /((?:\[[^\]]*\])*)?\s*((?:[^\d\[\.](?!S\d))*)?\s*((?:S\d+[^\w\[]*E?)?[\d\-]*)\s*(.*)?/,
str = `[HorribleSubs] Black Clover - 143 [1080p].mkv`,
m,
store = {};
function regtest() {
if ((m = regex.exec(str)) !== null) {
if (m[2].endsWith(" - ")) {
m[2] = m[2].slice(0, -3)
}
if (!store[m[2]] && !alResponse.data.Page.media.some(media => (Object.values(media.title).concat(media.synonyms).filter(name => name != null).includes(m[2]) && ((store[m[2]] = media) && true)))) {
//shit not found, lookup
}
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
}

View file

@ -13069,251 +13069,283 @@ if (typeof Object.create === 'function') {
},{}],14:[function(require,module,exports){
(function (Buffer){
const Transform = require('readable-stream').Transform
const ebml = require('ebml')
const ebmlBlock = require('ebml-block')
const readElement = require('./lib/read-element')
// track elements we care about
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
const CUES_ID = Buffer.from('1C53BB6B', 'hex')
class MatroskaSubtitles extends Transform {
constructor ({ prevInstance, offset } = {}) {
super()
let currentTrack = null
let currentSubtitleBlock = null
let currentClusterTimecode = null
let currentSeekID = null
let waitForNext = false
this.decoder = new ebml.Decoder()
if (prevInstance instanceof MatroskaSubtitles) {
if (offset == null) throw new Error('no offset')
prevInstance.once('drain', () => {
// prevInstance.end()
console.log('prevInstance drained')
})
if (offset === 0) {
// just begin normal parsing
this.subtitleTracks = prevInstance.subtitleTracks || new Map()
this.timecodeScale = prevInstance.timecodeScale || 1
const ebml = require('ebml')
const ebmlBlock = require('ebml-block')
const readElement = require('./lib/read-element')
// track elements we care about
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
// const CUES_ID = Buffer.from('1C53BB6B', 'hex')
class MatroskaSubtitles extends Transform {
constructor ({ prevInstance, offset } = {}) {
super()
this.id = (Math.random() * 10000) | 0
this.offset = offset
this.bcount = 0
let currentTrack = null
let currentSubtitleBlock = null
let currentClusterTimecode = null
let currentSeekID = null
let waitForNext = false
this.on('close', () => {
console.log('CLOSED:', this.id)
});
this.on('finish', () => {
console.log('FINISH:', this.id)
});
this.decoder = new ebml.Decoder()
if (prevInstance instanceof MatroskaSubtitles) {
if (offset == null) throw new Error('no offset')
if (prevInstance.decoder) console.log(`prevInstance id=${prevInstance.id}: decoder t=${prevInstance.decoder.total}, c=${prevInstance.decoder.cursor}`)
prevInstance.once('drain', () => {
console.log(`prevInstancee id=${prevInstance.id}: drained`)
// prevInstance.end()
})
if (offset === 0) {
// just begin normal parsing
this.subtitleTracks = prevInstance.subtitleTracks || new Map()
this.timecodeScale = prevInstance.timecodeScale || 1
this.cues = prevInstance.cues
this.decoder.on('data', _onMetaData.bind(this))
return
}
// copy previous metadata
this.subtitleTracks = prevInstance.subtitleTracks
this.timecodeScale = prevInstance.timecodeScale
this.cues = prevInstance.cues
this.decoder.on('data', _onMetaData.bind(this))
return
}
// copy previous metadata
this.subtitleTracks = prevInstance.subtitleTracks
this.timecodeScale = prevInstance.timecodeScale
this.cues = prevInstance.cues
if (!this.cues) {
this.decoder = null
return console.warn('No cues was parsed. Subtitle parsing disabled.')
}
// find a cue that's close to the file offset
// const cueArray = Uint32Array.from(this.cues.positions)
// cueArray.sort()
const cueArray = Array.from(this.cues.positions)
cueArray.sort((a, b) => a - b)
const closestCue = cueArray.find(i => i >= offset)
if (closestCue != null) {
// prepare to skip file stream until we hit a cue position
this.skip = closestCue - offset
// set internal decoder position to output consistent file offsets
this.decoder.total = closestCue
// console.log('using cue:', closestCue)
this.decoder.on('data', _onMetaData.bind(this))
} else {
this.decoder = null
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
}
} else {
if (offset) {
this.decoder = null
console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
return
}
this.subtitleTracks = new Map()
this.timecodeScale = 1
this.decoder.on('data', _onMetaData.bind(this))
}
function _onMetaData (chunk) {
if (waitForNext) {
waitForNext = false
// Keep cues if this is the same segment
if (!this.cues) {
this.cues = { start: chunk[1].start, positions: new Set() }
} else if (this.cues.start !== chunk[1].start) {
this.cues = { start: chunk[1].start, positions: new Set() }
console.warn('New segment found - resetting cues! Not sure we can handle this!?')
this.decoder = null
// TODO: AVOID THIS! - we can actually add seek points without the segment start (decoder.total), but this would cause problems later
return console.warn('No cues was parsed. Subtitle parsing disabled.')
}
// TODO: should always be the case, but we currently allow some slack (initial instance not at z=0)
if (prevInstance.decoder) {
// use the position of the previous decoder as a valid seek point
// this can help if offset is changed before parsing seeks and cues
const decoderPosition = prevInstance.decoder.total - prevInstance.decoder.cursor
this.cues.positions.add(decoderPosition)
}
// find a cue that's close to the file offset
// const cueArray = Uint32Array.from(this.cues.positions)
// cueArray.sort()
const cueArray = Array.from(this.cues.positions)
cueArray.sort((a, b) => a - b)
const closestCue = cueArray.find(i => i >= offset)
if (closestCue != null) {
// prepare to skip file stream until we hit a cue position
this.skip = closestCue - offset
// set internal decoder position to output consistent file offsets
this.decoder.total = closestCue
// console.log('using cue:', closestCue)
this.decoder.on('data', _onMetaData.bind(this))
} else {
console.info('Saw first segment again. Keeping cues.')
this.decoder = null
console.warn(`No cues for offset ${offset}. Subtitle parsing disabled.`)
}
}
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
// TODO: only record first segment?
// TODO: find a simpler way to do this
waitForNext = true
}
if (chunk[1].name === 'SeekID') {
// TODO: .value is undefined for some reason?
currentSeekID = chunk[1].data
}
if (currentSeekID && chunk[1].name === 'SeekPosition') {
//if (CUES_ID.equals(currentSeekID)) {
// hack: this is not a cue position, but the position to the cue data itself,
// in case it's not located at the beginning of the file.
// actually, just add all seek positions.
this.cues.positions.add(this.cues.start + chunk[1].value)
//}
}
if (chunk[1].name === 'CueClusterPosition') {
this.cues.positions.add(this.cues.start + chunk[1].value)
}
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
this.emit('cues')
}
// Segment Information
if (chunk[1].name === 'TimecodeScale') {
this.timecodeScale = readElement(chunk[1]) / 1000000
}
// Tracks
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readElement(chunk[1])
}
}
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
if (currentTrack.TrackType === 0x11) { // Subtitle Track
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
const track = {
number: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7).toLowerCase()
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
track.header = currentTrack.CodecPrivate.toString('utf8')
}
this.subtitleTracks.set(currentTrack.TrackNumber, track)
}
}
currentTrack = null
}
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
// this.decoder.removeListener('data', _onMetaData)
// if (this.subtitleTracks.size <= 0) return this.end()
// this.decoder.on('data', _onClusterData)
this.emit('tracks', Array.from(this.subtitleTracks.values()))
}
// }
// function _onClusterData (chunk) {
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readElement(chunk[1])
}
if (chunk[1].name === 'Block') {
const block = ebmlBlock(chunk[1].data)
if (this.subtitleTracks.has(block.trackNumber)) {
const type = this.subtitleTracks.get(block.trackNumber).type
const subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
}
if (type === 'ass' || type === 'ssa') {
// extract SSA/ASS keys
const values = subtitle.text.split(',')
// ignore read-order, and skip layer if ssa
let i = type === 'ssa' ? 2 : 1
for (; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
}
// re-append extra text that might have been split
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
}
}
currentSubtitleBlock = [subtitle, block.trackNumber]
}
}
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
this.emit('subtitle', ...currentSubtitleBlock)
currentSubtitleBlock = null
}
}
}
_transform (chunk, _, callback) {
if (!this.decoder) return callback(null, chunk)
if (this.skip) {
// skip bytes to reach cue position
if (this.skip < chunk.length) {
// slice chunk
const sc = chunk.slice(this.skip)
this.skip = 0
this.decoder.write(sc)
} else {
// skip entire chunk
this.skip -= chunk.length
if (offset) {
this.decoder = null
console.error(`Offset is ${offset}, and must be 0 for initial instance. Subtitle parsing disabled.`)
return
}
this.subtitleTracks = new Map()
this.timecodeScale = 1
this.decoder.on('data', _onMetaData.bind(this))
}
function _onMetaData (chunk) {
if (waitForNext) {
waitForNext = false
// Keep cues if this is the same segment
if (!this.cues) {
this.cues = { start: chunk[1].start, positions: new Set() }
} else if (this.cues.start !== chunk[1].start) {
this.cues = { start: chunk[1].start, positions: new Set() }
console.warn('New segment found - resetting cues! Not sure we can handle this!?')
} else {
console.info('Saw first segment again. Keeping cues.')
}
}
if (chunk[0] === 'start' && chunk[1].name === 'Segment') {
// TODO: only record first segment?
// TODO: find a simpler way to do this
waitForNext = true
}
if (chunk[1].name === 'SeekID') {
// TODO: .value is undefined for some reason?
currentSeekID = chunk[1].data
}
if (currentSeekID && chunk[1].name === 'SeekPosition') {
//if (CUES_ID.equals(currentSeekID)) {
// hack: this is not a cue position, but the position to the cue data itself,
// in case it's not located at the beginning of the file.
// actually, just add all seek positions.
this.cues.positions.add(this.cues.start + chunk[1].value)
//}
}
if (chunk[1].name === 'CueClusterPosition') {
this.cues.positions.add(this.cues.start + chunk[1].value)
}
if (chunk[0] === 'end' && chunk[1].name === 'Cues') {
this.emit('cues')
}
// Segment Information
if (chunk[1].name === 'TimecodeScale') {
this.timecodeScale = readElement(chunk[1]) / 1000000
}
// Tracks
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readElement(chunk[1])
}
}
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
if (currentTrack.TrackType === 0x11) { // Subtitle Track
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
const track = {
number: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7).toLowerCase()
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
track.header = currentTrack.CodecPrivate.toString('utf8')
}
this.subtitleTracks.set(currentTrack.TrackNumber, track)
}
}
currentTrack = null
}
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
// this.decoder.removeListener('data', _onMetaData)
// if (this.subtitleTracks.size <= 0) return this.end()
// this.decoder.on('data', _onClusterData)
this.emit('tracks', Array.from(this.subtitleTracks.values()))
}
// }
// function _onClusterData (chunk) {
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readElement(chunk[1])
}
if (chunk[1].name === 'Block') {
const block = ebmlBlock(chunk[1].data)
if (this.subtitleTracks.has(block.trackNumber)) {
const type = this.subtitleTracks.get(block.trackNumber).type
const subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * this.timecodeScale
}
if (type === 'ass' || type === 'ssa') {
// extract SSA/ASS keys
const values = subtitle.text.split(',')
// ignore read-order, and skip layer if ssa
let i = type === 'ssa' ? 2 : 1
for (; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
}
// re-append extra text that might have been split
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
}
}
currentSubtitleBlock = [subtitle, block.trackNumber]
}
}
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[0].duration = readElement(chunk[1]) * this.timecodeScale
this.emit('subtitle', ...currentSubtitleBlock)
currentSubtitleBlock = null
}
}
} else {
this.decoder.write(chunk)
}
callback(null, chunk)
_transform (chunk, _, callback) {
console.log(`Write id=${this.id}: z=${this.offset}, l=${chunk.length}, skip=${this.skip} pos=${(this.offset || 0) + this.bcount}`)
this.bcount += chunk.length
if (!this.decoder) {
console.warn('Skipped decoder')
return callback(null, chunk)
}
if (this.skip) {
if (this.skip > 1048576 * 20) {
console.warn(this.id, 'High skip value... This is bad.')
}
// skip bytes to reach cue position
if (this.skip < chunk.length) {
// slice chunk
const sc = chunk.slice(this.skip)
this.skip = 0
this.decoder.write(sc)
} else {
// skip entire chunk
this.skip -= chunk.length
}
} else {
this.decoder.write(chunk)
}
callback(null, chunk)
}
}
}
module.exports = MatroskaSubtitles
module.exports = MatroskaSubtitles
}).call(this,require("buffer").Buffer)
},{"./lib/read-element":15,"buffer":3,"ebml":12,"ebml-block":9,"readable-stream":31}],15:[function(require,module,exports){

View file

@ -9,7 +9,6 @@ function parseSubs(range, stream) {
parser = new MatroskaSubtitles({ prevInstance: parser, offset: range.start })
parser.once('tracks', function (pTracks) {
console.log(pTracks)
tracks = []
pTracks.forEach(track => {
tracks[track.number] = video.addTextTrack('captions', track.type, track.language || track.number)