#flv.py #(C) kousue@gmail.com 2007. #an FLV-reading library #writing might come later #based on http://www.peldi.com/fmswiki/index.php?title=FLVParser_-_Split_an_FLV_into_pieces #see also http://en.wikipedia.org/wiki/Action_Message_Format ## Redistribution and use of this software in source and binary forms, ## with or without modification, are ## permitted provided that the following conditions are met: ## ##* Redistributions of source code must retain the above ## copyright notice, this list of conditions and the ## following disclaimer. ## ##* Redistributions in binary or packaged form must reproduce ## the above copyright notice, this list of conditions and the ## following disclaimer in the documentation and/or other ## materials provided with the distribution. #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A #PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR #ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT #LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS #INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR #TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #XXX a big problem with this library is that it assumes the file you give it is in blocking mode (so that it gets all the data at once) import struct #XXX is there anything for python that parses C-structs and wraps them in struct.py calls? #--probably not, most .. XXX duh, yes it's already *there* in the struct module. Failure. #well them, how about one that reads C structs and rewrites them as struct.py format strings? #also, another problem is that the way C aligns struct members is not entirely clear, and it's concievable that different Cs could do it differently def parse_flv_header(s): def parse_header_flags(flags): return {"reserved": bool(flags & 0xf0), "audio": bool(flags & 0x04), "video": bool(flags & 0x01), #...? &0x8? #...? &0x2? } Signature, Version, Flags, DataOffset = struct.unpack(">3s B B I xxx", s) Flags = parse_header_flags(Flags) return Signature, Version, Flags, DataOffset def extract_n_byte_int(format_str, data): return reduce(lambda a,b: (a<<8)|b, struct.unpack(format_str, data)) def parse_tag_header(s): type, datasize, timestamp, reserved = struct.unpack(">B 3s 3s I", s) type = {8: "audio", 9: "video", 18: "metadata", 0: "undefined"}.get(type, "unknown") datasize = extract_n_byte_int(">3B", datasize) #XXX are these in the write direction?? timestamp = extract_n_byte_int(">3B", timestamp) return {'type': type, 'datasize': datasize, 'timestamp': timestamp, 'reserved': reserved} #take a binary stream and give back a list of all it's tags def decode_chunks(flv): tag, data = {}, "" while 1: prev_tag_size = flv.read(4) if len(prev_tag_size)==4: prev_tag_size = struct.unpack(">I", prev_tag_size) #why is this even used? to provide some sort of streaming feature probably... #assert prev_tag_size == len(data) + 11 #make sure that the tag = flv.read(11) if len(tag)==0: break elif len(tag)!=11: raise Exception("Parse error: FLV tag ends early (truncated file?)") tag = parse_tag_header(tag) data = flv.read(tag['datasize']) print tag #print repr(data) if len(data)!=tag['datasize']: raise Exception("Parse error: FLV data section is too short :/; got %d bytes, wanted %d" % (len(data), tag['datasize'])) #NB> data can't be too long, since the argument to read() is a -maximum- if tag['type']=="audio": #reference: http://search.cpan.org/src/CLOTHO/FLV-Info-0.17/lib/FLV/AudioTag.pm type_header = ord(data[0]) format = {0:"uncompressed", 1:"ADPCM", 2:"mp3", 5:"Nellymoser 8kHz mono", 6:"Nellymoser"}.get((type_header & 0xf0) >> 4, "unknown") rate = {0: '5518 Hz', 1: '11025 Hz', 2: '22050 Hz', 3: '44100 Hz',}.get((type_header & 0x0c) >> 2, "unknown") size = {0:"8 bit", 1:"16 bit"}.get((type_header & 0x02) >> 1, "unknown") type = {0:"mono", 1:"stereo"}.get((type_header & 0x01) >> 0, "unknown") tag['audio_header'] = {'format': format, 'rate': rate, 'size': size, 'type': type} data = data[1:] #chop off the audio info byte elif tag['type']=="video": #reference: http://search.cpan.org/src/CLOTHO/FLV-Info-0.17/lib/FLV/VideoTag.pm type_header = ord(data[0]) type = {1:"keyframe", 2:"interframe", 3:"disposable interframe"}.get((type_header & 0xf0) >> 4, "unknown") codec = {2: 'Sorenson H.263', 3: 'Screen video', 4: 'On2 VP6', 5: 'On2 VP6 + alpha', 6: 'Screen video v2',}.get((type_header & 0x0f) >> 0, "unknown") tag['video_header'] = {'type': type, 'codec': codec} data = data[1:] #chop off the byte elif tag['type']=="metadata": #actionscript-message-format encoded metadata try: import pyamf data = list(pyamf.decode(data)) #XXX whooo hack except ImportError: data = data print flv.tell() yield tag, data #take a binary flv and give back (header, chunks) def decode(flv): header = flv.read(12) assert len(header) == 12 signature, version, flags, offset = header = parse_flv_header(header) if signature != "FLV": raise ValueError("Stream does not have an FLV header") if version != 1: raise ValueError("Only know FLV 1, not %i" % version) #XXX yes? flv.seek(offset) chunks = decode_chunks(flv) return header, chunks def encode(flv): raise NotImplementedError if __name__ == '__main__': import sys if len(sys.argv) < 1: print "usage: bitch bitch bitch" f = sys.argv[1] a,g= decode(open(f, "rb")) print a import itertools audio_stream = (burst for burst in g if burst[0]['type']=='audio') o = open(f[:-4]+".mp3","wb") for i, (tag, bytes) in enumerate(audio_stream): print "[%i] %s ^^^^^^^^ %r" % (i, tag, bytes) o.write(bytes) #raw_input() o.close()