Initial CLD2 source upload.
git-svn-id: https://cld2.googlecode.com/svn/trunk@3 b252ecd4-b096-bf77-eb8e-91563289f87e
This commit is contained in:
175
internal/offsetmap.h
Normal file
175
internal/offsetmap.h
Normal file
@@ -0,0 +1,175 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//
|
||||
// Author: dsites@google.com (Dick Sites)
|
||||
//
|
||||
|
||||
#ifndef UTIL_UTF8_OFFSETMAP_H_
|
||||
#define UTIL_UTF8_OFFSETMAP_H_
|
||||
|
||||
#include <string> // for string
|
||||
#include "integral_types.h" // for uint32
|
||||
|
||||
// ***************************** OffsetMap **************************
|
||||
//
|
||||
// An OffsetMap object is a container for a mapping from offsets in one text
|
||||
// buffer A' to offsets in another text buffer A. It is most useful when A' is
|
||||
// built from A via substitutions that occasionally do not preserve byte length.
|
||||
//
|
||||
// A series of operators are used to build the correspondence map, then
|
||||
// calls can be made to map an offset in A' to an offset in A, or vice versa.
|
||||
// The map starts with offset 0 in A corresponding to offset 0 in A'.
|
||||
// The mapping is then built sequentially, adding on byte ranges that are
|
||||
// identical in A and A', byte ranges that are inserted in A', and byte ranges
|
||||
// that are deleted from A. All bytes beyond those specified when building the
|
||||
// map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
|
||||
// end of the map.
|
||||
//
|
||||
// The internal data structure records positions at which bytes are added or
|
||||
// deleted. Using the map is O(1) when increasing the A' or A offset
|
||||
// monotonically, and O(n) when accessing random offsets, where n is the
|
||||
// number of differences.
|
||||
//
|
||||
|
||||
namespace CLD2 {
|
||||
|
||||
class OffsetMap {
|
||||
public:
|
||||
// Constructor, destructor
|
||||
OffsetMap();
|
||||
~OffsetMap();
|
||||
|
||||
// Clear the map
|
||||
void Clear();
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes correspond
|
||||
// in A and A'
|
||||
void Copy(int bytes);
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// inserted in A' while not advancing in A at all
|
||||
void Insert(int bytes);
|
||||
|
||||
// Add to mapping from A to A', specifying how many next bytes are
|
||||
// deleted from A while not advancing in A' at all
|
||||
void Delete(int bytes);
|
||||
|
||||
// Print map to file, for debugging
|
||||
void Printmap(const char* filename);
|
||||
|
||||
// [Finish building map,] Re-position to offset 0
|
||||
// This call is optional; MapForward and MapBack finish building the map
|
||||
// if necessary
|
||||
void Reset();
|
||||
|
||||
// Map an offset in A' to the corresponding offset in A
|
||||
int MapBack(int aprimeoffset);
|
||||
|
||||
// Map an offset in A to the corresponding offset in A'
|
||||
int MapForward(int aoffset);
|
||||
|
||||
// h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
|
||||
// from A' to A'' and h is from A to A''.
|
||||
//
|
||||
// Note that g->MoveForward(f->MoveForward(aoffset)) always equals
|
||||
// to h->MoveForward(aoffset), while
|
||||
// f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
|
||||
// to h->MoveBack(aprimeprimeoffset). This happens when deletion in
|
||||
// f and insertion in g are at the same place. For example,
|
||||
//
|
||||
// A 1 2 3 4
|
||||
// ^ | ^ ^
|
||||
// | | / | f
|
||||
// v vv v
|
||||
// A' 1' 2' 3'
|
||||
// ^ ^^ ^
|
||||
// | | \ | g
|
||||
// v | v v
|
||||
// A'' 1'' 2'' 3'' 4''
|
||||
//
|
||||
// results in:
|
||||
//
|
||||
// A 1 2 3 4
|
||||
// ^ ^\ ^ ^
|
||||
// | | \ | | h
|
||||
// v | vv v
|
||||
// A'' 1'' 2'' 3'' 4''
|
||||
//
|
||||
// 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
|
||||
// the latter figure.
|
||||
static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
|
||||
|
||||
// For debugging only; writes to stderr
|
||||
void DumpWindow();
|
||||
|
||||
// For testing only -- force a mapping
|
||||
void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
|
||||
|
||||
private:
|
||||
enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
|
||||
|
||||
void Flush();
|
||||
void FlushAll();
|
||||
void MaybeFlushAll();
|
||||
void Emit(MapOp op, int len);
|
||||
|
||||
void SetLeft();
|
||||
void SetRight();
|
||||
|
||||
// Back up over previous range, 1..5 bytes
|
||||
// Return subscript at the beginning of that. Pins at 0
|
||||
int Backup(int sub);
|
||||
|
||||
// Parse next range, 1..5 bytes
|
||||
// Return subscript just off the end of that
|
||||
int ParseNext(int sub, MapOp* op, int* length);
|
||||
|
||||
// Parse previous range, 1..5 bytes
|
||||
// Return current subscript
|
||||
int ParsePrevious(int sub, MapOp* op, int* length);
|
||||
|
||||
void PrintPosition(const char* str);
|
||||
|
||||
bool MoveRight(); // Returns true if OK
|
||||
bool MoveLeft(); // Returns true if OK
|
||||
void DumpString();
|
||||
|
||||
// Copies insert operations from source to dest. Returns true if no
|
||||
// other operations are found.
|
||||
static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
|
||||
|
||||
// Copies delete operations from source to dest. Returns true if no other
|
||||
// operations are found.
|
||||
static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
|
||||
|
||||
std::string diffs_;
|
||||
MapOp pending_op_;
|
||||
uint32 pending_length_;
|
||||
|
||||
// Offsets in the ranges below correspond to each other, with A' = A + diff
|
||||
int next_diff_sub_;
|
||||
int current_lo_aoffset_;
|
||||
int current_hi_aoffset_;
|
||||
int current_lo_aprimeoffset_;
|
||||
int current_hi_aprimeoffset_;
|
||||
int current_diff_;
|
||||
int max_aoffset_;
|
||||
int max_aprimeoffset_;
|
||||
};
|
||||
|
||||
} // namespace CLD2
|
||||
|
||||
#endif // UTIL_UTF8_OFFSETMAP_H_
|
||||
|
Reference in New Issue
Block a user