Spaces:
Running
Running
| // Binary parsers for xorb and shard files | |
| import type { | |
| ParsedFileMetadata, | |
| Chunk, | |
| ChunkHeader, | |
| ShardData, | |
| MerkleHash, | |
| MDBShardFileHeader, | |
| MDBShardFileFooter, | |
| FileDataSequenceHeader, | |
| FileDataSequenceEntry, | |
| FileVerificationEntry, | |
| FileMetadataExt, | |
| CASChunkSequenceHeader, | |
| CASChunkSequenceEntry, | |
| MDBFileInfo, | |
| MDBCASInfo, | |
| } from "./types.js"; | |
| import { MDB_SHARD_HEADER_TAG, XORB_IDENT } from "./types.js"; | |
| export class BinaryReader { | |
| private data: Uint8Array; | |
| private offset: number = 0; | |
| constructor(data: Uint8Array) { | |
| this.data = data; | |
| } | |
| readUint8(): number { | |
| if (this.offset >= this.data.length) { | |
| console.trace(); | |
| throw new Error("Unexpected end of data"); | |
| } | |
| return this.data[this.offset++]; | |
| } | |
| readUint32LE(): number { | |
| if (this.offset + 4 > this.data.length) { | |
| console.trace(); | |
| throw new Error("Unexpected end of data"); | |
| } | |
| const result = new DataView(this.data.buffer).getUint32(this.offset, true); | |
| this.offset += 4; | |
| return result; | |
| } | |
| readUint64LE(): bigint { | |
| if (this.offset + 8 > this.data.length) { | |
| console.trace(); | |
| throw new Error("Unexpected end of data"); | |
| } | |
| const result = new DataView(this.data.buffer).getBigUint64( | |
| this.offset, | |
| true | |
| ); | |
| this.offset += 8; | |
| return result; | |
| } | |
| readBytes(length: number): Uint8Array { | |
| if (this.offset + length > this.data.length) { | |
| console.trace(); | |
| throw new Error("Unexpected end of data"); | |
| } | |
| const result = this.data.slice(this.offset, this.offset + length); | |
| this.offset += length; | |
| return result; | |
| } | |
| readHash(): MerkleHash { | |
| const data = this.readBytes(32); | |
| // const u64_0 = this.readUint64LE(); | |
| // const u64_1 = this.readUint64LE(); | |
| // const u64_2 = this.readUint64LE(); | |
| // const u64_3 = this.readUint64LE(); | |
| return { data }; | |
| // return { data: [u64_0, u64_1, u64_2, u64_3] }; | |
| } | |
| readString(length: number): string { | |
| const bytes = this.readBytes(length); | |
| return new TextDecoder().decode(bytes); | |
| } | |
| seek(position: number): void { | |
| this.offset = position; | |
| } | |
| seekFromEnd(offsetFromEnd: number): void { | |
| this.offset = this.data.length - offsetFromEnd; | |
| } | |
| get position(): number { | |
| return this.offset; | |
| } | |
| get remaining(): number { | |
| return this.data.length - this.offset; | |
| } | |
| } | |
| function arraysEqual(a: Uint8Array, b: Uint8Array): boolean { | |
| if (a.length !== b.length) return false; | |
| for (let i = 0; i < a.length; i++) { | |
| if (a[i] !== b[i]) return false; | |
| } | |
| return true; | |
| } | |
| function isBookendHash(hash: MerkleHash): boolean { | |
| // Bookend hash is all 0xFF bytes (all 64-bit values should be 0xFFFFFFFFFFFFFFFF) | |
| return hash.data.every((value) => value === 0xff); | |
| // return hash.data.every((value) => value === 0xffffffffffffffffn); | |
| } | |
| export function formatHash(hash: MerkleHash): string { | |
| // For every 8 bytes in hash.data, flip the order of the 8 bytes, | |
| // concatenate each new flipped slice, then convert the result array to a hexadecimal string. | |
| if (!hash || !hash.data || hash.data.length !== 32) return ""; | |
| const flipped = new Uint8Array(32); | |
| for (let i = 0; i < 4; i++) { | |
| const start = i * 8; | |
| for (let j = 0; j < 8; j++) { | |
| flipped[start + j] = hash.data[start + (7 - j)]; | |
| } | |
| } | |
| // Convert to hex string | |
| return Array.from(flipped) | |
| .map((b) => b.toString(16).padStart(2, "0")) | |
| .join(""); | |
| return ""; | |
| } | |
| // File type detection removed - type is now specified by user selection | |
| function parseXorbFile(data: Uint8Array): Chunk[] { | |
| const reader = new BinaryReader(data); | |
| const chunks: Chunk[] = []; | |
| while (reader.remaining > 0) { | |
| // Check if we have enough bytes for a header | |
| if (reader.remaining < 8) { | |
| console.error("Unexpected end of data parsing xorb file"); | |
| break; | |
| } | |
| const header_bytes = reader.readBytes(8); | |
| let is_xorb_ident = true; | |
| // Urgh how do I compare two Uint8Arrays? | |
| for (let i = 0; i < 7; i++) { | |
| if (header_bytes[i] !== XORB_IDENT[i]) { | |
| is_xorb_ident = false; | |
| break; | |
| } | |
| } | |
| if (is_xorb_ident) { | |
| // reached optional xorb footer, skip rest | |
| break; | |
| } | |
| const header = new DataView(header_bytes.buffer); | |
| const version = header.getUint8(0); | |
| const compressed_size = | |
| header.getUint8(1) | | |
| (header.getUint8(2) << 8) | | |
| (header.getUint8(3) << 16); | |
| const compression_type = header.getUint8(4); | |
| const uncompressed_size = | |
| header.getUint8(5) | | |
| (header.getUint8(6) << 8) | | |
| (header.getUint8(7) << 16); | |
| const chunkHeader: ChunkHeader = { | |
| version, | |
| compressed_size, | |
| compression_type, | |
| uncompressed_size, | |
| }; | |
| const compressed_data = reader.readBytes(compressed_size); | |
| chunks.push({ header: chunkHeader, compressed_data }); | |
| } | |
| return chunks; | |
| } | |
| function parseShardFile(data: Uint8Array): ShardData { | |
| const reader = new BinaryReader(data); | |
| // Parse header | |
| const tag = reader.readBytes(32); | |
| if (!arraysEqual(tag, MDB_SHARD_HEADER_TAG)) { | |
| throw new Error("Invalid shard file header tag"); | |
| } | |
| const header: MDBShardFileHeader = { | |
| tag, | |
| version: Number(reader.readUint64LE()), | |
| footer_size: Number(reader.readUint64LE()), | |
| }; | |
| if (header.version !== 2) { | |
| throw new Error(`Unsupported shard header version: ${header.version}`); | |
| } | |
| // Parse footer (from end of file) | |
| reader.seekFromEnd(header.footer_size); | |
| const version = Number(reader.readUint64LE()); | |
| const file_info_offset = Number(reader.readUint64LE()); | |
| const cas_info_offset = Number(reader.readUint64LE()); | |
| // Skip first buffer (48 bytes) | |
| reader.readBytes(48); | |
| const chunk_hash_hmac_key = reader.readHash(); | |
| const shard_creation_timestamp = Number(reader.readUint64LE()); | |
| const shard_key_expiry = Number(reader.readUint64LE()); | |
| // Skip second buffer (72 bytes) | |
| reader.readBytes(72); | |
| const footer_offset = Number(reader.readUint64LE()); | |
| const footer: MDBShardFileFooter = { | |
| version, | |
| file_info_offset, | |
| cas_info_offset, | |
| chunk_hash_hmac_key, | |
| shard_creation_timestamp, | |
| shard_key_expiry, | |
| footer_offset, | |
| }; | |
| if (footer.version !== 1) { | |
| throw new Error(`Unsupported shard footer version: ${footer.version}`); | |
| } | |
| // Parse file info section | |
| const file_info: MDBFileInfo[] = []; | |
| reader.seek(footer.file_info_offset); | |
| while (reader.position < footer.cas_info_offset) { | |
| const pos = reader.position; | |
| const file_hash = reader.readHash(); | |
| // Check for bookend | |
| if (isBookendHash(file_hash)) { | |
| reader.readBytes(16); // unused | |
| break; | |
| } | |
| const file_flags = reader.readUint32LE(); | |
| const num_entries = reader.readUint32LE(); | |
| const _unused = reader.readBytes(8); | |
| const header: FileDataSequenceHeader = { | |
| file_hash, | |
| file_flags, | |
| num_entries, | |
| _unused, | |
| }; | |
| // Read entries | |
| const entries: FileDataSequenceEntry[] = []; | |
| for (let i = 0; i < num_entries; i++) { | |
| const cas_hash = reader.readHash(); | |
| const cas_flags = reader.readUint32LE(); | |
| const unpacked_segment_bytes = reader.readUint32LE(); | |
| const chunk_index_start = reader.readUint32LE(); | |
| const chunk_index_end = reader.readUint32LE(); | |
| entries.push({ | |
| cas_hash, | |
| cas_flags, | |
| unpacked_segment_bytes, | |
| chunk_index_start, | |
| chunk_index_end, | |
| }); | |
| } | |
| // Read verification entries if present | |
| let verification_entries: FileVerificationEntry[] | undefined; | |
| if (file_flags & 0x80000000) { | |
| verification_entries = []; | |
| for (let i = 0; i < num_entries; i++) { | |
| verification_entries.push({ | |
| chunk_hash: reader.readHash(), | |
| _unused: reader.readBytes(16), | |
| }); | |
| } | |
| } | |
| // Read metadata extension if present | |
| let metadata_ext: FileMetadataExt | undefined; | |
| if (file_flags & 0x40000000) { | |
| metadata_ext = { | |
| sha256: reader.readHash(), | |
| _unused: reader.readBytes(16), | |
| }; | |
| } | |
| file_info.push({ | |
| header, | |
| entries, | |
| verification_entries, | |
| metadata_ext, | |
| }); | |
| } | |
| // Parse CAS info section | |
| const cas_info: MDBCASInfo[] = []; | |
| reader.seek(footer.cas_info_offset); | |
| while (reader.position < footer.footer_offset) { | |
| const cas_hash = reader.readHash(); | |
| // Check for bookend | |
| if (isBookendHash(cas_hash)) { | |
| break; | |
| } | |
| const cas_flags = reader.readUint32LE(); | |
| const num_entries = reader.readUint32LE(); | |
| const num_bytes_in_cas = reader.readUint32LE(); | |
| const num_bytes_on_disk = reader.readUint32LE(); | |
| const header: CASChunkSequenceHeader = { | |
| cas_hash, | |
| cas_flags, | |
| num_entries, | |
| num_bytes_in_cas, | |
| num_bytes_on_disk, | |
| }; | |
| // Read entries | |
| const entries: CASChunkSequenceEntry[] = []; | |
| for (let i = 0; i < num_entries; i++) { | |
| entries.push({ | |
| chunk_hash: reader.readHash(), | |
| chunk_byte_range_start: reader.readUint32LE(), | |
| unpacked_segment_bytes: reader.readUint32LE(), | |
| _unused: Number(reader.readUint64LE()), | |
| }); | |
| } | |
| cas_info.push({ | |
| header, | |
| entries, | |
| }); | |
| } | |
| return { | |
| header, | |
| footer, | |
| file_info, | |
| cas_info, | |
| }; | |
| } | |
| export async function parseFile( | |
| file: File, | |
| fileType: "xorb" | "shard" | |
| ): Promise<ParsedFileMetadata> { | |
| try { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| const data = new Uint8Array(arrayBuffer); | |
| let parsedData: Chunk[] | ShardData; | |
| if (fileType === "xorb") { | |
| parsedData = parseXorbFile(data); | |
| } else { | |
| parsedData = parseShardFile(data); | |
| } | |
| return { | |
| type: fileType, | |
| filename: file.name, | |
| fileSize: file.size, | |
| data: parsedData, | |
| }; | |
| } catch (error) { | |
| return { | |
| type: fileType, | |
| filename: file.name, | |
| fileSize: file.size, | |
| data: [] as any, | |
| error: error instanceof Error ? error.message : "Unknown error occurred", | |
| }; | |
| } | |
| } | |
| // Helper functions for displaying data | |
| export function formatBytes(bytes: number): string { | |
| if (bytes === 0) return "0 B"; | |
| const k = 1000; | |
| const sizes = ["B", "KB", "MB", "GB"]; | |
| const i = Math.floor(Math.log(bytes) / Math.log(k)); | |
| return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i]; | |
| } | |
| export function formatTimestamp(timestamp: number): string { | |
| return new Date(timestamp * 1000).toISOString(); | |
| } | |
| export function formatHashShort(hash: MerkleHash): string { | |
| const fullHash = formatHash(hash); | |
| return fullHash.substring(0, 16) + "..."; | |
| } | |