...
!!!CONSIDER THAT WE SHOULD HAVE IN SOURCE SEGMENT ONLY 3 TYPES OF TAGS - PH_ELEMENT, BPT_ELEMENT and EPT_ELEMENT, because all of them was regenerated with their attributes at import stage
At this point we read the source and target segments "as is", without any tag replacement in lists. so original_id would be id, that was generated_id at import stage.
SOURCE_SEGMENT{
<ph x="1" />{
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
matchingTag.generated_tagType == PH_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" />
else
maybe just return <x/> tag?
save tag in SOURCE_TAGS
}
<bpt i="1" x="2"/> {
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.generated_tagType == BPT_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
set our_tag.generated_i = matchingTag.original_i
if matchingTag.original_tagType == BX_ELEMENT // do BX_ELEMENT always have id and rid attributes provided?
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
else:
[rid="{our_tag.generated_id}"] - means optional, so for example if it's bigger than 0, then we should add this attribute
use that that data to generate tag like <our_tag.generated_tagType [id="{our_tag.generated_id}"] [rid="{our_tag.generated_id}"] >
else
maybe just return <bx/> tag?
save tag in SOURCE_TAGS
}
<ept i="1" /> {
search for matching tag in saved tags:
looking in REQUEST_TAGS in REVERSE for matchingTag which have
[ matchingTag.generated_tagType == EPT_ELEMENT //or our_tag.original_tagType
AND matchingTag.generated_id == our_tag.original_id // id should hold information about paired BPT_ELEMENT, or it's absence
]
if found
set our_tag.generated_tagType = matchingTag.original_tagType
set our_tag.generated_id = matchingTag.original_id
set our_tag.generated_i = matchingTag.original_i
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
if matchingTag.original_tagType == EX_ELEMENT // do EX_ELEMENT always have id and rid attributes provided?
use that that data to generate tag like <our_tag.generated_tagType id="{our_tag.generated_id}" rid="{our_tag.generated_id}" />
else:
[rid="{our_tag.generated_id}"] - means optional, so for example if it's bigger than 0, then we should add this attribute
use that that data to generate tag like </our_tag.generated_tagType>
else
maybe just return <ex/> tag? or add some specific attributes?
save tag in SOURCE_TAGS
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////NEW PSEUDO CODE//////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////
Tag replacement feature implementation is splited into 2 functions:
GenerateReplacingTag - input - tagType, attributeList
output - tagInfo
this function would generate tagInfo data structure that saves original data(tagType, attributes(i\rid and x\id only) and would generate new data that suits context\segment
PrintTag - input - tagInfo
- output - text representation of tag with attributes depending on context
this function would print tag with attributes(if they exist(bigger than 0). If it's fuzzy call, would replace for source and target segments tags with matching tags from request.
If matching tag not found - would generate new tag in xliff format with id or rid attributes that rising starting from biggest id and rid values +1 that was present in requested segment
for request segment this function would pring tag with generated data - that is never used in production, but can be used to find out how mechanism normalized input request segment
(we base tag matching on this normalization.)
////////////////////////////////////
struct TagInfo
{
bool fPairTagClosed = true; // false for bpt tag - waiting for matching ept tag. If we'll find matching tag -> we'll set this to true
bool fTagAlreadyUsedInTarget = false; // would be set to true if we would already use this tag as matching for target
// this we generate to save in TM. this would be saved as <{generated_tagType} [x={generated_id}] [i={generated_i}]/>.
// we would skip x attribute for generated_tagType=EPT_ELEMENT and i for generated_tagType=PH_ELEMENT
int generated_i = -1; // for pair tags - generated identifier to find matching tag. the same as in original_i if it's not binded to other tag in segment
int generated_id = -1; // id of tag. should match original_id, if it's not occupied by other tags
TagType generated_tagType = UNKNOWN_ELEMENT; // replaced tagType, could be only PH_ELEMENT, BPT_ELEMENT, EPT_ELEMENT
// this cant be generated, only saved from provided data
int original_i = -1; // original paired tags i
int original_id = -1; // original id of tag
TagType original_tagType = UNKNOWN_ELEMENT; // original tagType, could be any tag
};
}
TagType could be one of the values in enum:
[
BPT_ELEMENT EPT_ELEMENT G_ELEMENT HI_ELEMENT SUB_ELEMENT BX_ELEMENT EX_ELEMENT
//standalone tags
BEGIN_STANDALONE_TAGS PH_ELEMENT X_ELEMENT IT_ELEMENT UT_ELEMENT
]
We make normalization process to tags which means to replace original xliff\tmx tags\attributes with only 3 tags:
<ph x='1' />
<bpt x='2' i='1' />
<ept i='1' />
which means that we would regenerate id\x in source, target and request segments to make them unified
for source\target segments this replacement is done at import process, for request we do tag replacement, then look for matches between source
and request segments(this happens in PringTag function), then replace tag from source with original tag that was in request
then we do the same with target segment - we try to find matches of target tags with generated tags in request, and then replace tags in target
with original tags from request
for example, we have this segments in import process
'source':"Select the <hi>net<ph/>work <g>BLK360</g> tag </hi>",
'target':"Select the <hi>net<ph/>work <g>BLK360</g> tag </hi>",
after tag replacement we would have this saved in tm:
'source' :'Select the <bpt x="1" i="1"/>net<ph x="2"/>work <bpt x="3" i="2"/>BLK360<ept i="2"/> tag <ept i="1"/>',
'target' :'Select the <bpt x="1" i="1"/>net<ph x="2"/>work <bpt x="3" i="2"/>BLK360<ept i="2"/> tag <ept i="1"/>',
then if we would have fuzzy request call with segment:
"Select the <g>net<x/>work< <g>BLK360</g> tag </g>"
after normalization we would get this:
"Select the <bpt x="1" i="1"/>net<ph x="2"/>work <bpt x="3" i="2"/>BLK360<ept i="2"/> tag <ept i="1"/>"
and then we would try to find matching tags in source and normalized request segments and in case of match-replace tag in src with original from request
and then do the same with target and request
in responce we should have:
'source' :'Select the <g>net<x/>work <g>BLK360</q> tag </g>',
'target' :'Select the <g>net<x/>work <g>BLK360</q> tag </g>',
////////////////////////////TagReplacer class//////////////////////
tag normalization statements:
- all single tags we understand as ph_tag that have only x attribute, and looks like this: "<ph x="1"/>"
- all opening pair tags we understand as bpt_tag that always have both i and x attributes, and looks like this: "<bpt x="1" i="1"/>"
- all closing pair tags we understand as ept_tag that always have only i attribute looks like this: "<ept i="1"/>"
- we ignore/skip context within <bpt> and </bpt> and replace this with single <bpt/> type tag, same is true for <ph/> and <ept/>
- as id we understand one of following attributes(which is present in original tag) : 'x', 'id'
- as i we understand one of following attributes(which is present in original tag) : 'i', 'rid'
TagReplacer{
// lists of tagInfo
SOURCE_TAGS
TARGET_TAGS
REQUEST_TAGS
activeSegment //could be one of following SOURCE_SEGMENT(default value), TARGET_SEGMENT, REQUEST_SEGMENT. Say us how we should handle tag replacement
iHighestI = 0; // increments with each opening pair tags
iHighestId = 0; // increments with each tag
fFuzzyRequest = false; // flag, that tracks if we are dealing with import or fuzzy request. Say us how we should handle tag replacement
//to track id and i attributes in request and then generate new values for tags in srt and trg that is not matching
iHighestRequestsOriginalI = 0; // during saving original data of tags in request segment we save here biggest original I and Id,
iHighestRequestsOriginalId = 0; // and in case if we couldn't find match in source segment, we would generate xliff tag([bx, ex ,x] or can we left [bpt, ept, ph]?)
// with using and incrementing this values
//functions
// during parsing of tags by xercesc we call this function to
// - collect and save original tagType and attributes(only 'id' and 'i')
// - generate normalized tag data
// - find matches between TARGET and SOURCE segment tags. If we have match - use generated data from SOURCE, if not - generate new unique data
// - save generated tags in lists depends on activeSegment value
// - returns tagInfo data structure
GenerateReplacingTag(tagType, attributes);
//accepts tagInfo data
//depending on fFuzzyRequest and activeSegment values just prints generated normalized tags with generated attributes
// or try to find match for tag from SOURCE\TARGET to REQUEST and print matching tag from REQUEST, or, if no matched, generate new xliff tag with unique attributes
PrintTag(tagInfo);
};
TagInfo{
fPairedTagClosed = false; // flag, set to false for bpt/ept tag - waiting for matching ept/bpt tag
fTagAlreadyUsedInTarget = false; // flag, that we use only when we save tags from source segment and then try to match\bind them in target
generated_i = 0; // for pair tags - generated identifier to find matching tag. the same as in original_i if it's not binded to other tag in segment
generated_id = 0; // id of tag. should match original_id, if it's not occupied by other tags
generated_tagType = UNKNOWN_ELEMENT; // replaced tagType, could be PH_ELEMENT, BPT_ELEMENT, EPT_ELEMENT
original_i = 0; // original paired tags i
original_id = 0; // original id of tag
original_tagType; // original tagType
};
//////////////////////////////////////////////////
GenerateReplacingTag{
SOURCE_SEGMENT/REQUEST_SEGMENT
//we handle SOURCE and REQUEST segments here the same way, but we
// use variables activeTagList, that should point to SOURCE_TAGS or REQUEST_TAGS
// to make code more generic
{
<single tags> -> would be saved as <ph>{ // for ph and all single tags
if(type == "lb"){
replace with newline
}else{
save original_tagType
save original_id if provided
if it's REQUEST_SEGMENT AND original_id > iHighestRequestsOriginalId
save original_id as new iHighestRequestsOriginalId
set generated_tagType to PH_ELEMENT
set fPairedTagClosed to true
generate generated_id incrementally ( increment iHighestId value, then use it )
save tag to activeTagList // SOURCE_TAGS or REQUEST_TAGS
}
}
<opening pair tags> -> would be saved as <bpt>{
save original_i if provided
save original_id if provided
save original_tagType
set generated_tagType to BPT_ELEMENT
//save biggest id and i attributes in request original data to generate new values
// that wouldn't overlap with other tags in case we wouldn't have matches
if it's REQUEST_SEGMENT AND original_i > iHighestRequestsOriginalI
save original_i as new iHighestRequestsOriginalI
if it's REQUEST_SEGMENT AND original_id > iHighestRequestsOriginalId
save original_id as new iHighestRequestsOriginalId
originalTagTypeToFind = UNKNOWN_ELEMENT // use this variable to identify which tag type we are looking for
if generated_tagType is BPT_ELEMENT
set originalTagTypeToFind to EPT_ELEMENT
else if generated_tagType is BX_ELEMENT
set originalTagTypeToFind to EX_ELEMENT
else
skip search, because other tags could never have wrong order between opening and closing tags
that would be error in <xml> and parser would throw INVALID_XML error then
if originalTagTypeToFind is not UNKNOWN_ELEMENT
try to find matching ept tag in this segment
looking in REVERSE order in activeTagList for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == EPT_ELEMENT //all CLOSING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == originalTagTypeToFind
AND matchingTag.original_i == our_bpt_tag.original_i
]
if mathingTag found
set generated_i to mathingTag.generated_i
set generated_id to -mathingTag.generated_id // EPT_TAGS have negative id's/x's that is equal to matching -bpt.x
// if there are no matching bpt, ept have unique, but still negative value.
// negative values and 0 would never be printed in PrintTag
set fPairTagClosed to true
set matchingTag.fPairTagClosed to true
else
generate generated_i incrementally ( increment iHighestI value, then use it )
generate generated_id incrementally ( increment iHighestId value, then use it )
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
save tag to activeTagList
}
<closing pair tags> -> would be saved as <ept>{
save original_i if provided
save original_id if provided
save original_tagType
set generated_tagType to EPT_ELEMENT
//save biggest id and i attributes in request original data to generate new values
// that wouldn't overlap with other tags in case we wouldn't have matches
if it's REQUEST_SEGMENT AND original_i > iHighestRequestsOriginalI
save original_i as new iHighestRequestsOriginalI
if it's REQUEST_SEGMENT AND original_id > iHighestRequestsOriginalId
save original_id as new iHighestRequestsOriginalId
originalTagTypeToFind = UNKNOWN_ELEMENT // use this variable to identify which tag type we are looking for
if generated_tagType is EPT_ELEMENT
set originalTagTypeToFind to BPT_ELEMENT
else if generated_tagType is EX_ELEMENT
set originalTagTypeToFind to BX_ELEMENT
else
skip search, because other tags could never have wrong order between opening and closing tags
that would be error in <xml> and parser would throw INVALID_XML error then
if originalTagTypeToFind is not UNKNOWN_ELEMENT
try to find matching ept tag in this segment
looking in REVERSE order in activeTagList for matchingTag which have [
matchingTag.fPairTagClosed == false
AND matchingTag.generated_tagType == BPT_ELEMENT //all CLOSING PAIR TAGs always has BPT_ELEMENT here
AND matchingTag.original_tagType == originalTagTypeToFind
AND matchingTag.original_i == our_ept_tag.original_i
]
if mathingTag found
set generated_i to mathingTag.generated_i
set generated_id to -mathingTag.generated_id // EPT_TAGS have negative id's/x's that is equal to matching -bpt.x
// if there are no matching bpt, ept have unique, but still negative value.
// negative values and 0 would never be printed in PrintTag
set fPairTagClosed to true
set matchingTag.fPairTagClosed to true
else
generate generated_i incrementally ( increment iHighestI value, then use it )
generate generated_id incrementally ( increment iHighestId value, then multiply it by *(-1) and use it )
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
save tag to activeTagList
}
}
TARGET_SEGMENT
//here we try to find connections from original Target tags to original Source tags and use data,
// that was generated for matching SOURCE tag. If there are no matching SOURCE tag - generate new unique attributes
{
save original_tagType
save original_id if provided
save original_i if provided
set generated_tagType to - PH_ELEMENT if we have single tag
- BPT_ELEMENT if we have opening pair tag
- EPT_ELEMENT if we have closing pair tag
try to find matching source tag
looking in SOURCE_TAGS for matchingSourceTag which have [
matchingSourceTag.fAlreadyUsedInTarget == false
AND matchingSourceTag.original_tagType == our_tag.original_tagType
AND matchingSourceTag.original_id == our_tag.original_id
]
if found:
set generated_i to matchingSourceTag.generated_i
set generated_id to matchingSourceTag.generated_id
// maybe we should add here search for matching ept\bpt tag in TARGET_TAGS, to set valid fPairTagClosed for both
set matchingSourceTag.fAlreadyUsedInTarget to true
else
if generated_tagType is PH_ELEMENT
set fPairTagClosed = true
else
use matchingTagOriginalType and matchingTagGeneratedType to find matching tag in TARGET_TAGS
if original_tagType is BPT_ELEMENT
set matchingTagOriginalType to EPT_ELEMENT
else if generated_tagType is BX_ELEMENT
set matchingTagOriginalType to EX_ELEMENT
else if original_tagType is EPT_ELEMENT
set matchingTagOriginalType to BPT_ELEMENT
else if generated_tagType is EX_ELEMENT
set matchingTagOriginalType to BX_ELEMENT
else
matchingTagOriginalType = original_tagType
if our_tag.generated_tagType = BPT_ELEMENT
set matchingTagGeneratedType to EPT_ELEMENT
else
set matchingTagGeneratedType to BPT_ELEMENT
try to find matching pair tag in this segment
looking in REVERSE order in TARGET for matchingPairTag which have [
matchingPairTag.fPairTagClosed == false
AND matchingPairTag.original_tagType == matchingTagOriginalType
AND matchingPairTag.generated_tagType == matchingTagGeneratedType
AND matchingPairTag.original_i == our_tag.original_i
]
if found:
set generated_i to mathingTag.generated_i
set generated_id to -mathingTag.generated_id // EPT_TAGS have negative id's/x's that is equal to matching -bpt.x
// if there are no matching bpt, ept have unique, but still negative value.
// negative values and 0 would never be printed in PrintTag
set fPairTagClosed to true
set matchingPairTag.fPairTagClosed to true
else:
if we dealing with pair tags -> generate generated_i incrementally ( increment iHighestI value, then use it )
generate generated_id incrementally ( increment iHighestId value, then multiply it by *(-1) and use it )
set fPairTagClosed to false; // it would be set to true if we would use this tag as matching
save tag in TARGET_TAGS
}
}
PrintTag{
variables: idToPrint = 0,
iToPrint = 0,
tagTypeToPrint = tag.generated_tagType
flags: fClosedTag = true; //for slash at the end of tags like <ph/>
fClosingTag = false; //for slash at the beginning of tag like </g>
if it's REQUEST_SEGMENT
// we need this only to track how tag replacement normalized tags in request segment
idToPrint = tag.generated_id
iToPrint = tag.generated_i
else
try to find matching request tag
looking in SOURCE_TAGS for matchingRequestTag which have [
matchingRequestTag.generated_id == our_tag.generated_i
AND matchingRequestTag.generated_tagType == our_tag.generated_tagType
]
if found:
set idToPrint to matchingRequestTag.original_id
set iToPrint to matchingRequestTag.original_i
set tagTypeToPrint to matchingRequestTag.original_tagType
set fClosingTag to tag.generated_tagType == EPT_ELEMENT
AND tagTypeToPrint != EPT_ELEMENT
AND tagTypeToPrint != EX_ELEMENT
else
//generate new id and i
generate idToPrint using iHighestRequestsOriginalId incrementally ( increment incrementally value and use it )
if generated_tagType is not PH_ELEMENT
//could be improved here if we need
generate iToPrint using iHighestRequestsOriginalI incrementally ( increment incrementally value and use it )
if fClosingTag is true
return ["</" + tagTypeToPrint + ">"]
else
output = ["<" + tagTypeToPrint]
if idToPrint > 0
if fFuzzyRequest is true:
append to output [' id="' + idToPrint + '"']
else
append to output [' x="' + idToPrint + '"']
if idToPrint > 0
if fFuzzyRequest is true:
append to output [' rid="' + iToPrint + '"']
else
append to output [' i="' + iToPrint + '"']
//tag that has slash at the end looks like this: <tag />
fClosedTag = tagTypeToPrint == BPT_ELEMENT OR
tagTypeToPrint == EPT_ELEMENT OR
tagTypeToPrint == PH_ELEMENT OR
tagTypeToPrint == BX_ELEMENT OR
tagTypeToPrint == EX_ELEMENT OR
tagTypeToPrint == X_ELEMENT ; // other tags could be only not closed(<g>) or closing(</g>)
if fClosedTag is true
append to output "/"
append to output ">"
return output
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Previous documentation:
|
...