...
TAG_REPLACEMENT PSEUDOCODE
struct TagInfo
{
bool fPairTagClosed = true; // false for bpt tag - waiting for matching ept tag. If we'll find matching tag -> we'll set this to true
bool fTagAlreadyUsedInTarget = false; // would be set to true if we would already use this tag as matching for target
// this we generate to save in TM. this would be saved as <{generated_tagType} [x={generated_x}] [i={generated_i}]/>.
// we would skip x attribute for generated_tagType=EPT_ELEMENT and i for generated_tagType=PH_ELEMENT
int generated_i = -1; // for pair tags - generated identifier to find matching tag. the same as in original_i if it's not binded to other tag in segment
int generated_x = -1; // id of tag. should match original_x, if it's not occupied by other tags
TagType generated_tagType = UNKNOWN_ELEMENT; // replaced tagType, could be only PH_ELEMENT, BPT_ELEMENT, EPT_ELEMENT
// this cant be generated, only saved from provided data
int original_i = -1; // original paired tags i
int original_x = -1; // original id of tag
TagType original_tagType = UNKNOWN_ELEMENT; // original tagType, could be any tag
};
}
TagType could be one of the values in enum:
[
BPT_ELEMENT EPT_ELEMENT G_ELEMENT HI_ELEMENT SUB_ELEMENT BX_ELEMENT EX_ELEMENT
//standalone tags
BEGIN_STANDALONE_TAGS PH_ELEMENT X_ELEMENT IT_ELEMENT UT_ELEMENT
]
we use 3 lists of tags
SOURCE_TAGS
TARGET_TAGS
REQUEST_TAGS
as id we understand one of following attributes(which is present in original tag) : 'x', 'id'
as i we understand one of following attributes(which is present in original tag) : 'i', 'rid'
all single tags we understand as ph_tag
all opening pair tags we understand as bpt_tag
all closing pair tags we understand as ept_tag
-1 means that value is not found/not used/not provided etc.
for ept tags in generated_id we would use generated_id from matching bpt tag
if matching bpt tag is not found -> ???
TagType could be set to one of following values
TAG REPLACEMENT USE CASES {
IMPORT{
SOURCE_SEGMENT{
<single tags> -> would be saved as <ph>{ // for ph and all single tags
if(type == "lb"){
replace with newline
}else{
generate next generated_id incrementally
ignore content and attributes(except id) if provided
set generated_tagType to PH_ELEMENT
save original_tagType for matching
if id provided -> save as original_id for matching
save tag to SOURCE_TAGS
}
}
<opening pair tags> -> would be saved as <bpt>{
original type is <bpt>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
save original_i (should that always be provided??)
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set original_type as BPT_ELEMENT
save tag to SOURCE_TAGS
}
original type is <bx>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
save original_i (should that always be provided??)
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set original_type as BX_ELEMENT
save tag to SOURCE_TAGS
}
original type is other openning pair tags(like <g>){
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
save tag type as original_tagType;
save tag to SOURCE_TAGS
}
}
<closing pair tags> -> would be saved as <ept>{
original type is <ept>{
search for matching bpt_tag in saved tags
//should we look in reverse order?
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == BPT_ELEMENT
AND matchingTag.original_i == our_ept_tag.original_i
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_ept_tag.i to matchingTag.i
set our_ept_tag.id to matchingTag.id
else
generate next our_ept_tag.generated_i incrementally in source segment // in every segment(target, source, request) i starts from 1
generate next our_ept_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in SOURCE_TAGS
}
original type is <ex>{
search for matching bpt_tag in saved tags
//should we look in reverse order?
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs has BPT_ELEMENT here
AND matchingTag.original_tagType == BX_ELEMENT
AND matchingTag.original_i == our_ept_tag.original_i
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_ept_tag.i to matchingTag.i
set our_ept_tag.id to matchingTag.id
else
generate next our_ept_tag.generated_i incrementally in source segment // in every segment(target, source, request) i starts from 1
generate next our_ept_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in SOURCE_TAGS
}
original type is others closing pair tags(like </g>){
search for matching bpt_tag in saved tags:
looking in SOURCE_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == our_tag.original_tagType
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else
generate next our_tag.generated_i incrementally in source segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in SOURCE_TAGS
}
}
}
TARGET_SEGMENT{
<single tags> -> would be saved as <ph>{ // for ph and all single tags
if(type == "lb"){
replace with newline
}else{
ignore content and attributes(except id) if provided
save original_tagType for matching
if id provided -> save as original_id for matching
search for matching ph_tag in saved tags
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fTagAlreadyUsedInTarget == false
AND matchingTag.generated_tagType == PH_ELEMENT //SINGLE TAG
AND matchingTag.original_tagType == our_ph_tag.original_tagType
AND matchingTag.original_id == our_ph_tag.original_id
]
if found
set matchingTag.fTagAlreadyUsedInTarget = true
set our_ph_tag.generated_id = matchingTag.generated_id // use id generated for source segment
else
generate new our_ph_tag.generated_id incrementally(should be unique for SOURCE and TARGET)
save tag in TARGET_TAGS // we should track only opening pair tags in target, so theoretically can skip this step
}
}
<opening tags> -> would be saved as <bpt>{
original type is <bpt>{
set generated_tagType to BPT_ELEMENT
save original_i (should that always be provided??)
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set original_type as BPT_ELEMENT
try to found matching source tag to get generated id:
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fTagAlreadyUsedInTarget == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == BPT_ELEMENT
AND matchingTag.original_id == our_bpt_tag.original_id
]
if found:
set matchingTag.fTagAlreadyUsedInTarget to true
generate our_bpt_tag.generated_i incrementally in target segment
set our_bpt_tag.generated_id to matchingTag.generated_id
else:
generate our_bpt_tag.generated_i incrementally // unique between all segments
generate our_bpt_tag.generated_id incrementally // unique between all segments
save tag in TARGET_TAGS
}
original type is <bx>{
set generated_tagType to BPT_ELEMENT
save original_i (should that always be provided??)
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set original_type as BX_ELEMENT
try to found matching source tag to get generated id:
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fTagAlreadyUsedInTarget == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == BX_ELEMENT
AND matchingTag.original_id == our_bpt_tag.original_id
]
if found:
set matchingTag.fTagAlreadyUsedInTarget to true
generate our_bpt_tag.generated_i incrementally in target segment
set our_bpt_tag.generated_id to matchingTag.generated_id
else:
generate our_bpt_tag.generated_i incrementally // unique between all segments
generate our_bpt_tag.generated_id incrementally // unique between all segments
save tag in TARGET_TAGS
}
original type is other openning pair tags(like <g>){
set generated_tagType to BPT_ELEMENT
we never have here original i attribute
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
save original_type
try to found matching source tag to get generated id:
looking in SOURCE_TAGS for matchingTag which have [
matchingTag.fTagAlreadyUsedInTarget == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == our_tag.original_tagType
AND matchingTag.original_id == our_tag.original_id
]
if found:
set matchingTag.fTagAlreadyUsedInTarget to true
generate our_tag.generated_i incrementally in target segment
set our_tag.generated_id to matchingTag.generated_id
else:
generate our_tag.generated_i incrementally // unique between all segments
generate our_tag.generated_id incrementally // unique between all segments
save tag in TARGET_TAGS
}
}
<closing tags> -> would be saved as <ept>{
original type is <ept>{
try to found matching bpt tag in TARGET_TAGS
looking in TARGET_TAGS for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == BPT_ELEMENT
AND matchingTag.original_i == our_tag.original_i
]
if found:
set matchingTag.fPairTagClosed to true
set our_tag.generated_id to matchingTag.generated_id
set our_tag.generated_i to matchingTag.generated_i
else:
generate our_tag.generated_i incrementally // unique between all segments
generate our_tag.generated_id incrementally // unique between all segments
save tag in TARGET_TAGS // we should track only opening pair tags in target, so theoretically can skip this step
}
original type is <ex>{
try to found matching bpt tag in TARGET_TAGS
looking in TARGET_TAGS for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all OPENING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == BX_ELEMENT
AND matchingTag.original_i == our_tag.original_i
]
if found:
set matchingTag.fPairTagClosed to true
set our_tag.generated_id to matchingTag.generated_id
set our_tag.generated_i to matchingTag.generated_i
else:
generate our_tag.generated_i incrementally // unique between all segments
generate our_tag.generated_id incrementally // unique between all segments
save tag in TARGET_TAGS // we should track only opening pair tags in target, so theoretically can skip this step
}
original type is others closing pair tags(like </g>){
search for matching bpt_tag in saved tags:
looking in TARGET_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == our_tag.original_tagType
]
if found:
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else :
generate next our_tag.generated_i incrementally in target segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in TARGET_TAGS // we should track only opening pair tags in target, so theoretically can skip this step
}
}
}
}
}
Tag replacement for fuzzy request pseudocode:
TAG_REPLACEMENT PSEUDOCODE
struct TagInfo
{
bool fPairTagClosed = true; // false for bpt tag - waiting for matching ept tag. If we'll find matching tag -> we'll set this to true
bool fTagAlreadyUsedInTarget = false; // would be set to true if we would already use this tag as matching for target
// this we generate to save in TM. this would be saved as <{generated_tagType} [x={generated_x}] [i={generated_i}]/>.
// we would skip x attribute for generated_tagType=EPT_ELEMENT and i for generated_tagType=PH_ELEMENT
int generated_i = -1; // for pair tags - generated identifier to find matching tag. the same as in original_i if it's not binded to other tag in segment
int generated_x = -1; // id of tag. should match original_x, if it's not occupied by other tags
TagType generated_tagType = UNKNOWN_ELEMENT; // replaced tagType, could be only PH_ELEMENT, BPT_ELEMENT, EPT_ELEMENT
// this cant be generated, only saved from provided data
int original_i = -1; // original paired tags i
int original_x = -1; // original id of tag
TagType original_tagType = UNKNOWN_ELEMENT; // original tagType, could be any tag
};
}
we use 3 lists of tags
SOURCE_TAGS
TARGET_TAGS
REQUEST_TAGS
as id we understand one of following attributes(which is present in original tag) : 'x', 'id'
as i we understand one of following attributes(which is present in original tag) : 'i', 'rid'
all single tags we understand as ph_tag
all opening pair tags we understand as bpt_tag
all closing pair tags we understand as ept_tag
-1 means that value is not found/not used/not provided etc.
for ept tags in generated_id we would use generated_id from matching bpt tag
if matching bpt tag is not found -> ???
TagType could be set to one of following values
TAG REPLACEMENT USE CASES {
REQUEST{
...
basically we convert request segment to tmx tags(similar as we generate ph, bpt and ept tags at import), but with saving original data
then we try to find matching tags from the source to generated from the request. In matching source tags we replace data with original from request(tagType, id and i attributes)
then do the same with target segment\tags
REQUEST_SEGMENT{
are we sending only xliff? so ph, bpt and ept tag shouldn't be handled here?
<single tags> { // for ph and all single tags
// here we can have PH, X, IT, UT tags, right?
generate generated_id incrementally
set generated_tagType to PH_ELEMENT
save original_id if provided (should that always be provided??)
save tag type as out_tag.original_tagType
save tag in REQUEST_TAGS
}
<opening tags> {
//this would be never send from translate5, right?
original type is <bpt>{
save tag in REQUEST_TAGS
}
original type is <bx>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
save original_i (should that always be provided??)
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set fTagAlreadyUsedInTarget to false;
set original_type as BX_ELEMENT
save tag to REQUEST_TAGS
}
original type is <g>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
we don't have original_i provided here, only original_id, right?
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set fTagAlreadyUsedInTarget to false;
set original_type as G_ELEMENT
save tag in REQUEST_TAGS
}
original type is <hi>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
we don't have original_i provided here, only original_id, right?
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set fTagAlreadyUsedInTarget to false;
set original_type as HI_ELEMENT
save tag in REQUEST_TAGS
}
original type is <sub>{
generate generated_i incrementally in source segment
generate generated_id incrementally
set generated_tagType to BPT_ELEMENT
we don't have original_i provided here, only original_id, right?
save original_id if provided (should that always be provided??)
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
set fTagAlreadyUsedInTarget to false;
set original_type as HI_ELEMENT
save tag in REQUEST_TAGS
}
}
<closing tags> {
//this would be never send from translate5, right?
original type is <ept>{
save tag in REQUEST_TAGS
}
original type is <ex>{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == BX_ELEMENT // our_tag.original_tagType
AND matchingTag.original_i == our_tag.original_i
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else
generate next our_tag.generated_i incrementally in request segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in REQUEST_TAGS
}
original type is </g>{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == G_ELEMENT // our_tag.original_tagType
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else
generate next our_tag.generated_i incrementally in request segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in REQUEST_TAGS
}
original type is </hi>{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == HI_ELEMENT // our_tag.original_tagType
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else
generate next our_tag.generated_i incrementally in request segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in REQUEST_TAGS
}
original type is </sub>{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //OPENING_PAIR_TAG
AND matchingTag.original_tagType == SUB_ELEMENT // our_tag.original_tagType
]
if found
set matchingTag.fPairTagClosed to true to eliminate matching one opening tag for different closing tags
set our_tag.generated_i to matchingTag.i
set our_tag.generated_id to matchingTag.id
else
generate next our_tag.generated_i incrementally in request segment // in every segment(target, source, request) i starts from 1
generate next our_tag.generated_id incrementally // should be unique across target, source and request segments
save tag in REQUEST_TAGS
}
}
}
!!!CONSIDER THAT WE SHOULD HAVE IN SOURCE SEGMENT ONLY 3 TYPES OF TAGS - PH_ELEMENT, BPT_ELEMENT and EPT_ELEMENT, because all of them was regenerated with their attributes at import stage
At this point we read the source and target segments "as is", without any tag replacement in lists. so original_id would be id, that was generated_id at import stage.
SOURCE_SEGMENT{
<ph x="1" />{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
matchingTag.generated_tagType == PH_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" />
else
maybe just return <x/> tag?
save tag in SOURCE_TAGS
}
<bpt i="1" x="2"/> {
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.generated_tagType == BPT_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
set our_tag.generated_i = matchingTag.original_i
if matchingTag.original_tagType == BX_ELEMENT // do BX_ELEMENT always have id and rid attributes provided?
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
else:
[rid="{our_tag.generated_id}"] - means optional, so for example if it's bigger than 0, then we should add this attribute
use that that data to generate tag like <our_tag.generated_tagType [id="{our_tag.generated_id}"] [rid="{our_tag.generated_id}"] >
else
maybe just return <bx/> tag?
save tag in SOURCE_TAGS
}
<ept i="1" /> {
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.generated_tagType == EPT_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id // id should hold information about paired BPT_ELEMENT, or it's absence
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
set our_tag.generated_i = matchingTag.original_i
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
if matchingTag.original_tagType == EX_ELEMENT // do EX_ELEMENT always have id and rid attributes provided?
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
else:
[rid="{our_tag.generated_id}"] - means optional, so for example if it's bigger than 0, then we should add this attribute
use that that data to generate tag like </our_tag.generated_tagType>
else
maybe just return <ex/> tag? or add some specific attributes?
save tag in SOURCE_TAGS
}
}
}
Previous documentation:
|
...