## ffmpeg / doc / snow.txt @ 0eb4ff9e

History | View | Annotate | Download (20.9 KB)

1 | 78954a05 | Michael Niedermayer | ============================================= |
---|---|---|---|

2 | ead6ebdd | Mike Melanson | Snow Video Codec Specification Draft 20080110 |

3 | 78954a05 | Michael Niedermayer | ============================================= |

4 | |||

5 | c567c694 | Mike Melanson | Introduction: |

6 | ============= |
||

7 | ead6ebdd | Mike Melanson | This specification describes the Snow bitstream syntax and semantics as |

8 | well as the formal Snow decoding process. |
||

9 | c567c694 | Mike Melanson | |

10 | The decoding process is described precisely and any compliant decoder |
||

11 | ead6ebdd | Mike Melanson | MUST produce the exact same output for a spec-conformant Snow stream. |

12 | c567c694 | Mike Melanson | For encoding, though, any process which generates a stream compliant to |

13 | the syntactical and semantic requirements and which is decodable by |
||

14 | 11de04d8 | Michael Niedermayer | the process described in this spec shall be considered a conformant |

15 | ead6ebdd | Mike Melanson | Snow encoder. |

16 | 78954a05 | Michael Niedermayer | |

17 | Definitions: |
||

18 | ============ |
||

19 | |||

20 | MUST the specific part must be done to conform to this standard |
||

21 | SHOULD it is recommended to be done that way, but not strictly required |
||

22 | |||

23 | ilog2(x) is the rounded down logarithm of x with basis 2 |
||

24 | ilog2(0) = 0 |
||

25 | |||

26 | Type definitions: |
||

27 | ================= |
||

28 | |||

29 | b 1-bit range coded |
||

30 | u unsigned scalar value range coded |
||

31 | s signed scalar value range coded |
||

32 | |||

33 | |||

34 | Bitstream syntax: |
||

35 | ================= |
||

36 | |||

37 | frame: |
||

38 | header |
||

39 | prediction |
||

40 | residual |
||

41 | |||

42 | header: |
||

43 | keyframe b MID_STATE |
||

44 | if(keyframe || always_reset) |
||

45 | reset_contexts |
||

46 | if(keyframe){ |
||

47 | version u header_state |
||

48 | always_reset b header_state |
||

49 | temporal_decomposition_type u header_state |
||

50 | temporal_decomposition_count u header_state |
||

51 | spatial_decomposition_count u header_state |
||

52 | colorspace_type u header_state |
||

53 | chroma_h_shift u header_state |
||

54 | chroma_v_shift u header_state |
||

55 | spatial_scalability b header_state |
||

56 | max_ref_frames-1 u header_state |
||

57 | qlogs |
||

58 | } |
||

59 | e9314de6 | Michael Niedermayer | if(!keyframe){ |

60 | b85bf991 | Michael Niedermayer | update_mc b header_state |

61 | if(update_mc){ |
||

62 | e9314de6 | Michael Niedermayer | for(plane=0; plane<2; plane++){ |

63 | diag_mc b header_state |
||

64 | htaps/2-1 u header_state |
||

65 | for(i= p->htaps/2; i; i--) |
||

66 | |hcoeff[i]| u header_state |
||

67 | } |
||

68 | } |
||

69 | bc66275b | Michael Niedermayer | update_qlogs b header_state |

70 | if(update_qlogs){ |
||

71 | spatial_decomposition_count u header_state |
||

72 | qlogs |
||

73 | } |
||

74 | e9314de6 | Michael Niedermayer | } |

75 | 78954a05 | Michael Niedermayer | |

76 | spatial_decomposition_type s header_state |
||

77 | qlog s header_state |
||

78 | mv_scale s header_state |
||

79 | qbias s header_state |
||

80 | block_max_depth s header_state |
||

81 | |||

82 | qlogs: |
||

83 | for(plane=0; plane<2; plane++){ |
||

84 | quant_table[plane][0][0] s header_state |
||

85 | for(level=0; level < spatial_decomposition_count; level++){ |
||

86 | quant_table[plane][level][1]s header_state |
||

87 | quant_table[plane][level][3]s header_state |
||

88 | } |
||

89 | } |
||

90 | |||

91 | reset_contexts |
||

92 | *_state[*]= MID_STATE |
||

93 | |||

94 | prediction: |
||

95 | for(y=0; y<block_count_vertical; y++) |
||

96 | for(x=0; x<block_count_horizontal; x++) |
||

97 | block(0) |
||

98 | |||

99 | block(level): |
||

100 | c3922c65 | Michael Niedermayer | mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0 |

101 | 78954a05 | Michael Niedermayer | if(keyframe){ |

102 | intra=1 |
||

103 | }else{ |
||

104 | if(level!=max_block_depth){ |
||

105 | s_context= 2*left->level + 2*top->level + topleft->level + topright->level |
||

106 | leaf b block_state[4 + s_context] |
||

107 | } |
||

108 | if(level==max_block_depth || leaf){ |
||

109 | intra b block_state[1 + left->intra + top->intra] |
||

110 | if(intra){ |
||

111 | y_diff s block_state[32] |
||

112 | cb_diff s block_state[64] |
||

113 | cr_diff s block_state[96] |
||

114 | }else{ |
||

115 | ref_context= ilog2(2*left->ref) + ilog2(2*top->ref) |
||

116 | if(ref_frames > 1) |
||

117 | ref u block_state[128 + 1024 + 32*ref_context] |
||

118 | mx_context= ilog2(2*abs(left->mx - top->mx)) |
||

119 | my_context= ilog2(2*abs(left->my - top->my)) |
||

120 | mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)] |
||

121 | mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)] |
||

122 | } |
||

123 | }else{ |
||

124 | block(level+1) |
||

125 | block(level+1) |
||

126 | block(level+1) |
||

127 | block(level+1) |
||

128 | } |
||

129 | } |
||

130 | |||

131 | |||

132 | residual: |
||

133 | 48fe9238 | Michael Niedermayer | residual2(luma) |

134 | residual2(chroma_cr) |
||

135 | residual2(chroma_cb) |
||

136 | |||

137 | residual2: |
||

138 | for(level=0; level<spatial_decomposition_count; level++){ |
||

139 | if(level==0) |
||

140 | subband(LL, 0) |
||

141 | subband(HL, level) |
||

142 | subband(LH, level) |
||

143 | subband(HH, level) |
||

144 | } |
||

145 | |||

146 | subband: |
||

147 | 78954a05 | Michael Niedermayer | FIXME |

148 | |||

149 | |||

150 | |||

151 | Tag description: |
||

152 | ---------------- |
||

153 | |||

154 | version |
||

155 | 0 |
||

156 | this MUST NOT change within a bitstream |
||

157 | |||

158 | always_reset |
||

159 | if 1 then the range coder contexts will be reset after each frame |
||

160 | |||

161 | temporal_decomposition_type |
||

162 | 0 |
||

163 | |||

164 | temporal_decomposition_count |
||

165 | 0 |
||

166 | |||

167 | spatial_decomposition_count |
||

168 | FIXME |
||

169 | |||

170 | colorspace_type |
||

171 | 0 |
||

172 | this MUST NOT change within a bitstream |
||

173 | |||

174 | chroma_h_shift |
||

175 | log2(luma.width / chroma.width) |
||

176 | this MUST NOT change within a bitstream |
||

177 | |||

178 | chroma_v_shift |
||

179 | log2(luma.height / chroma.height) |
||

180 | this MUST NOT change within a bitstream |
||

181 | |||

182 | spatial_scalability |
||

183 | 0 |
||

184 | |||

185 | max_ref_frames |
||

186 | maximum number of reference frames |
||

187 | this MUST NOT change within a bitstream |
||

188 | |||

189 | e9314de6 | Michael Niedermayer | update_mc |

190 | indicates that motion compensation filter parameters are stored in the |
||

191 | header |
||

192 | |||

193 | diag_mc |
||

194 | flag to enable faster diagonal interpolation |
||

195 | this SHOULD be 1 unless it turns out to be covered by a valid patent |
||

196 | |||

197 | htaps |
||

198 | number of half pel interpolation filter taps, MUST be even, >0 and <10 |
||

199 | |||

200 | hcoeff |
||

201 | half pel interpolation filter coefficients, hcoeff[0] are the 2 middle |
||

202 | coefficients [1] are the next outer ones and so on, resulting in a filter |
||

203 | like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... |
||

204 | the sign of the coefficients is not explicitly stored but alternates |
||

205 | after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... |
||

206 | hcoeff[0] is not explicitly stored but found by subtracting the sum |
||

207 | of all stored coefficients with signs from 32 |
||

208 | hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ... |
||

209 | a good choice for hcoeff and htaps is |
||

210 | htaps= 6 |
||

211 | hcoeff={40,-10,2} |
||

212 | an alternative which requires more computations at both encoder and |
||

213 | decoder side and may or may not be better is |
||

214 | htaps= 8 |
||

215 | hcoeff={42,-14,6,-2} |
||

216 | |||

217 | |||

218 | 78954a05 | Michael Niedermayer | ref_frames |

219 | minimum of the number of available reference frames and max_ref_frames |
||

220 | for example the first frame after a key frame always has ref_frames=1 |
||

221 | |||

222 | spatial_decomposition_type |
||

223 | wavelet type |
||

224 | 0 is a 9/7 symmetric compact integer wavelet |
||

225 | 1 is a 5/3 symmetric compact integer wavelet |
||

226 | others are reserved |
||

227 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
||

228 | |||

229 | qlog |
||

230 | quality (logarthmic quantizer scale) |
||

231 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
||

232 | |||

233 | mv_scale |
||

234 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
||

235 | 24dbec7c | Luca Barbato | FIXME check that everything works fine if this changes between frames |

236 | 78954a05 | Michael Niedermayer | |

237 | qbias |
||

238 | dequantization bias |
||

239 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
||

240 | |||

241 | block_max_depth |
||

242 | maximum depth of the block tree |
||

243 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
||

244 | |||

245 | quant_table |
||

246 | quantiztation table |
||

247 | |||

248 | 8f39b74d | Michael Niedermayer | |

249 | Highlevel bitstream structure: |
||

250 | ============================= |
||

251 | -------------------------------------------- |
||

252 | | Header | |
||

253 | -------------------------------------------- |
||

254 | | ------------------------------------ | |
||

255 | | | Block0 | | |
||

256 | | | split? | | |
||

257 | | | yes no | | |
||

258 | | | ......... intra? | | |
||

259 | | | : Block01 : yes no | | |
||

260 | | | : Block02 : ....... .......... | | |
||

261 | | | : Block03 : : y DC : : ref index: | | |
||

262 | | | : Block04 : : cb DC : : motion x : | | |
||

263 | | | ......... : cr DC : : motion y : | | |
||

264 | | | ....... .......... | | |
||

265 | | ------------------------------------ | |
||

266 | | ------------------------------------ | |
||

267 | | | Block1 | | |
||

268 | | ... | |
||

269 | -------------------------------------------- |
||

270 | | ------------ ------------ ------------ | |
||

271 | || Y subbands | | Cb subbands| | Cr subbands|| |
||

272 | || --- --- | | --- --- | | --- --- || |
||

273 | || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| || |
||

274 | || --- --- | | --- --- | | --- --- || |
||

275 | || --- --- | | --- --- | | --- --- || |
||

276 | || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| || |
||

277 | || --- --- | | --- --- | | --- --- || |
||

278 | || --- --- | | --- --- | | --- --- || |
||

279 | || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| || |
||

280 | || --- --- | | --- --- | | --- --- || |
||

281 | || --- --- | | --- --- | | --- --- || |
||

282 | || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| || |
||

283 | || ... | | ... | | ... || |
||

284 | | ------------ ------------ ------------ | |
||

285 | -------------------------------------------- |
||

286 | |||

287 | Decoding process: |
||

288 | ================= |
||

289 | |||

290 | ------------ |
||

291 | | | |
||

292 | | Subbands | |
||

293 | ------------ | | |
||

294 | | | ------------ |
||

295 | | Intra DC | | |
||

296 | | | LL0 subband prediction |
||

297 | ------------ | |
||

298 | \ Dequantizaton |
||

299 | ------------------- \ | |
||

300 | | Reference frames | \ IDWT |
||

301 | | ------- ------- | Motion \ | |
||

302 | ||Frame 0| |Frame 1|| Compensation . OBMC v ------- |
||

303 | | ------- ------- | --------------. \------> + --->|Frame n|-->output |
||

304 | | ------- ------- | ------- |
||

305 | ||Frame 2| |Frame 3||<----------------------------------/ |
||

306 | | ... | |
||

307 | ------------------- |
||

308 | |||

309 | |||

310 | 78954a05 | Michael Niedermayer | Range Coder: |

311 | ============ |
||

312 | 59edca9a | Michael Niedermayer | |

313 | Binary Range Coder: |
||

314 | ------------------- |
||

315 | e5635270 | lars.taeuber@web.de | The implemented range coder is an adapted version based upon "Range encoding: |

316 | an algorithm for removing redundancy from a digitised message." by G. N. N. |
||

317 | Martin. |
||

318 | ead6ebdd | Mike Melanson | The symbols encoded by the Snow range coder are bits (0|1). The |

319 | e5635270 | lars.taeuber@web.de | associated probabilities are not fix but change depending on the symbol mix |

320 | seen so far. |
||

321 | |||

322 | 78954a05 | Michael Niedermayer | |

323 | ca087dbc | Michael Niedermayer | bit seen | new state |

324 | 762f804c | Lars TÃ¤uber | ---------+----------------------------------------------- |

325 | ca087dbc | Michael Niedermayer | 0 | 256 - state_transition_table[256 - old_state]; |

326 | 1 | state_transition_table[ old_state]; |
||

327 | |||

328 | state_transition_table = { |
||

329 | 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, |
||

330 | 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, |
||

331 | 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, |
||

332 | 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, |
||

333 | 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, |
||

334 | 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, |
||

335 | 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, |
||

336 | 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, |
||

337 | 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, |
||

338 | 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, |
||

339 | 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, |
||

340 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, |
||

341 | 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, |
||

342 | 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, |
||

343 | 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, |
||

344 | 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0}; |
||

345 | |||

346 | 59edca9a | Michael Niedermayer | FIXME |

347 | |||

348 | |||

349 | aa6b38c2 | Diego Biurrun | Range Coding of integers: |

350 | 762f804c | Lars TÃ¤uber | ------------------------- |

351 | 59edca9a | Michael Niedermayer | FIXME |

352 | |||

353 | ca087dbc | Michael Niedermayer | |

354 | 78954a05 | Michael Niedermayer | Neighboring Blocks: |

355 | =================== |
||

356 | left and top are set to the respective blocks unless they are outside of |
||

357 | the image in which case they are set to the Null block |
||

358 | |||

359 | 90b5b51e | Diego Biurrun | top-left is set to the top left block unless it is outside of the image in |

360 | 78954a05 | Michael Niedermayer | which case it is set to the left block |

361 | |||

362 | 90b5b51e | Diego Biurrun | if this block has no larger parent block or it is at the left side of its |

363 | 78954a05 | Michael Niedermayer | parent block and the top right block is not outside of the image then the |

364 | top right block is used for top-right else the top-left block is used |
||

365 | |||

366 | Null block |
||

367 | y,cb,cr are 128 |
||

368 | level, ref, mx and my are 0 |
||

369 | |||

370 | |||

371 | Motion Vector Prediction: |
||

372 | ========================= |
||

373 | 1. the motion vectors of all the neighboring blocks are scaled to |
||

374 | compensate for the difference of reference frames |
||

375 | |||

376 | scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8 |
||

377 | |||

378 | 2. the median of the scaled left, top and top-right vectors is used as |
||

379 | motion vector prediction |
||

380 | |||

381 | 3. the used motion vector is the sum of the predictor and |
||

382 | (mvx_diff, mvy_diff)*mv_scale |
||

383 | |||

384 | |||

385 | Intra DC Predicton: |
||

386 | ====================== |
||

387 | the luma and chroma values of the left block are used as predictors |
||

388 | |||

389 | the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff |
||

390 | 2cc45470 | Michael Niedermayer | to reverse this in the decoder apply the following: |

391 | c3922c65 | Michael Niedermayer | block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff; |

392 | block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff; |
||

393 | block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff; |
||

394 | 2cc45470 | Michael Niedermayer | block[*][-1].dc[*]= 128; |

395 | 78954a05 | Michael Niedermayer | |

396 | |||

397 | Motion Compensation: |
||

398 | ==================== |
||

399 | e9314de6 | Michael Niedermayer | |

400 | Halfpel interpolation: |
||

401 | ---------------------- |
||

402 | halfpel interpolation is done by convolution with the halfpel filter stored |
||

403 | in the header: |
||

404 | |||

405 | horizontal halfpel samples are found by |
||

406 | H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1]) |
||

407 | + hcoeff[1]*(F[y][x-1] + F[y][x+2]) |
||

408 | + hcoeff[2]*(F[y][x-2] + F[y][x+3]) |
||

409 | + ... |
||

410 | h1[y][x] = (H1[y][x] + 32)>>6; |
||

411 | |||

412 | vertical halfpel samples are found by |
||

413 | H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x]) |
||

414 | + hcoeff[1]*(F[y-1][x] + F[y+2][x]) |
||

415 | + ... |
||

416 | h2[y][x] = (H2[y][x] + 32)>>6; |
||

417 | |||

418 | vertical+horizontal halfpel samples are found by |
||

419 | H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1]) |
||

420 | + hcoeff[1]*(H2[y][x-1] + H2[y][x+2]) |
||

421 | + ... |
||

422 | H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x]) |
||

423 | + hcoeff[1]*(H1[y+1][x] + H1[y+2][x]) |
||

424 | + ... |
||

425 | h3[y][x] = (H3[y][x] + 2048)>>12; |
||

426 | |||

427 | |||

428 | F H1 F |
||

429 | | | | |
||

430 | | | | |
||

431 | | | | |
||

432 | F H1 F |
||

433 | | | | |
||

434 | | | | |
||

435 | | | | |
||

436 | F-------F-------F-> H1<-F-------F-------F |
||

437 | v v v |
||

438 | H2 H3 H2 |
||

439 | ^ ^ ^ |
||

440 | F-------F-------F-> H1<-F-------F-------F |
||

441 | | | | |
||

442 | | | | |
||

443 | | | | |
||

444 | F H1 F |
||

445 | | | | |
||

446 | | | | |
||

447 | | | | |
||

448 | F H1 F |
||

449 | |||

450 | |||

451 | unavailable fullpel samples (outside the picture for example) shall be equal |
||

452 | to the closest available fullpel sample |
||

453 | |||

454 | |||

455 | Smaller pel interpolation: |
||

456 | -------------------------- |
||

457 | if diag_mc is set then points which lie on a line between 2 vertically, |
||

458 | horiziontally or diagonally adjacent halfpel points shall be interpolated |
||

459 | linearls with rounding to nearest and halfway values rounded up. |
||

460 | points which lie on 2 diagonals at the same time should only use the one |
||

461 | diagonal not containing the fullpel point |
||

462 | |||

463 | |||

464 | |||

465 | F-->O---q---O<--h1->O---q---O<--F |
||

466 | v \ / v \ / v |
||

467 | O O O O O O O |
||

468 | | / | \ | |
||

469 | q q q q q |
||

470 | | / | \ | |
||

471 | O O O O O O O |
||

472 | ^ / \ ^ / \ ^ |
||

473 | h2-->O---q---O<--h3->O---q---O<--h2 |
||

474 | v \ / v \ / v |
||

475 | O O O O O O O |
||

476 | | \ | / | |
||

477 | q q q q q |
||

478 | | \ | / | |
||

479 | O O O O O O O |
||

480 | ^ / \ ^ / \ ^ |
||

481 | F-->O---q---O<--h1->O---q---O<--F |
||

482 | |||

483 | |||

484 | |||

485 | the remaining points shall be bilinearly interpolated from the |
||

486 | a11dc59a | Michael Niedermayer | up to 4 surrounding halfpel and fullpel points, again rounding should be to |

487 | nearest and halfway values rounded up |
||

488 | e9314de6 | Michael Niedermayer | |

489 | ead6ebdd | Mike Melanson | compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma |

490 | e9314de6 | Michael Niedermayer | interpolation at least |

491 | |||

492 | |||

493 | Overlapped block motion compensation: |
||

494 | ------------------------------------- |
||

495 | 78954a05 | Michael Niedermayer | FIXME |

496 | |||

497 | LL band prediction: |
||

498 | =================== |
||

499 | 1e37b7e4 | Michael Niedermayer | Each sample in the LL0 subband is predicted by the median of the left, top and |

500 | left+top-topleft samples, samples outside the subband shall be considered to |
||

501 | be 0. To reverse this prediction in the decoder apply the following. |
||

502 | for(y=0; y<height; y++){ |
||

503 | for(x=0; x<width; x++){ |
||

504 | sample[y][x] += median(sample[y-1][x], |
||

505 | sample[y][x-1], |
||

506 | sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]); |
||

507 | } |
||

508 | } |
||

509 | sample[-1][*]=sample[*][-1]= 0; |
||

510 | width,height here are the width and height of the LL0 subband not of the final |
||

511 | video |
||

512 | |||

513 | 78954a05 | Michael Niedermayer | |

514 | Dequantizaton: |
||

515 | ============== |
||

516 | FIXME |
||

517 | |||

518 | Wavelet Transform: |
||

519 | ================== |
||

520 | fdb99704 | Michael Niedermayer | |

521 | Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer |
||

522 | transform and a integer approximation of the symmetric biorthogonal 9/7 |
||

523 | daubechies wavelet. |
||

524 | |||

525 | 09671ce7 | Michael Niedermayer | 2D IDWT (inverse discrete wavelet transform) |

526 | -------------------------------------------- |
||

527 | The 2D IDWT applies a 2D filter recursively, each time combining the |
||

528 | 4 lowest frequency subbands into a single subband until only 1 subband |
||

529 | remains. |
||

530 | The 2D filter is done by first applying a 1D filter in the vertical direction |
||

531 | and then applying it in the horizontal one. |
||

532 | --------------- --------------- --------------- --------------- |
||

533 | |LL0|HL0| | | | | | | | | | | | |
||

534 | 7397cf3f | Michael Niedermayer | |---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | | |

535 | 09671ce7 | Michael Niedermayer | |LH0|HH0| | | | | | | | | | | | |

536 | |-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->... |
||

537 | | | | | | | | | | | | | |
||

538 | | LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | | |
||

539 | | | | | | | | | | | | | |
||

540 | --------------- --------------- --------------- --------------- |
||

541 | |||

542 | |||

543 | 1D Filter: |
||

544 | ---------- |
||

545 | 1. interleave the samples of the low and high frequency subbands like |
||

546 | s={L0, H0, L1, H1, L2, H2, L3, H3, ... } |
||

547 | note, this can end with a L or a H, the number of elements shall be w |
||

548 | s[-1] shall be considered equivalent to s[1 ] |
||

549 | s[w ] shall be considered equivalent to s[w-2] |
||

550 | |||

551 | 2. perform the lifting steps in order as described below |
||

552 | |||

553 | 5/3 Integer filter: |
||

554 | 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w |
||

555 | 2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w |
||

556 | |||

557 | \ | /|\ | /|\ | /|\ | /|\ |
||

558 | \|/ | \|/ | \|/ | \|/ | |
||

559 | + | + | + | + | -1/4 |
||

560 | /|\ | /|\ | /|\ | /|\ | |
||

561 | / | \|/ | \|/ | \|/ | \|/ |
||

562 | | + | + | + | + +1/2 |
||

563 | |||

564 | |||

565 | ead6ebdd | Mike Melanson | Snow's 9/7 Integer filter: |

566 | 09671ce7 | Michael Niedermayer | 1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w |

567 | 2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w |
||

568 | 3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w |
||

569 | 4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w |
||

570 | |||

571 | \ | /|\ | /|\ | /|\ | /|\ |
||

572 | \|/ | \|/ | \|/ | \|/ | |
||

573 | + | + | + | + | -3/8 |
||

574 | /|\ | /|\ | /|\ | /|\ | |
||

575 | / | \|/ | \|/ | \|/ | \|/ |
||

576 | (| + (| + (| + (| + -1 |
||

577 | \ + /|\ + /|\ + /|\ + /|\ +1/4 |
||

578 | \|/ | \|/ | \|/ | \|/ | |
||

579 | + | + | + | + | +1/16 |
||

580 | /|\ | /|\ | /|\ | /|\ | |
||

581 | / | \|/ | \|/ | \|/ | \|/ |
||

582 | | + | + | + | + +3/2 |
||

583 | fdb99704 | Michael Niedermayer | |

584 | a282102d | Michael Niedermayer | optimization tips: |

585 | following are exactly identical |
||

586 | (3a)>>1 == a + (a>>1) |
||

587 | (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2 |
||

588 | 78954a05 | Michael Niedermayer | |

589 | 6a1aa752 | Michael Niedermayer | 16bit implementation note: |

590 | The IDWT can be implemented with 16bits, but this requires some care to |
||

591 | prevent overflows, the following list, lists the minimum number of bits needed |
||

592 | for some terms |
||

593 | 1. lifting step |
||

594 | A= s[i-1] + s[i+1] 16bit |
||

595 | 3*A + 4 18bit |
||

596 | A + (A>>1) + 2 17bit |
||

597 | |||

598 | 3. lifting step |
||

599 | s[i-1] + s[i+1] 17bit |
||

600 | |||

601 | 4. lifiting step |
||

602 | 3*(s[i-1] + s[i+1]) 17bit |
||

603 | |||

604 | |||

605 | 78954a05 | Michael Niedermayer | TODO: |

606 | ===== |
||

607 | Important: |
||

608 | finetune initial contexts |
||

609 | flip wavelet? |
||

610 | try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients |
||

611 | try the MV length as context for coding the residual coefficients |
||

612 | use extradata for stuff which is in the keyframes now? |
||

613 | the MV median predictor is patented IIRC |
||

614 | 2b6134b3 | Michael Niedermayer | implement per picture halfpel interpolation |

615 | c78fc717 | Michael Niedermayer | try different range coder state transition tables for different contexts |

616 | 78954a05 | Michael Niedermayer | |

617 | Not Important: |
||

618 | c64a8712 | Michael Niedermayer | compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality) |

619 | 78954a05 | Michael Niedermayer | spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later) |

620 | |||

621 | |||

622 | Credits: |
||

623 | ======== |
||

624 | Michael Niedermayer |
||

625 | Loren Merritt |
||

626 | |||

627 | |||

628 | Copyright: |
||

629 | ========== |
||

630 | GPL + GFDL + whatever is needed to make this a RFC |