commit d4a380c2346e616fcbde234e5a2572802eeb66fb Author: Araozu Date: Tue Sep 10 10:48:19 2024 -0500 osm parsing diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1f8b2e8 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module osm_parser + +go 1.23.0 diff --git a/main.go b/main.go new file mode 100644 index 0000000..39c0bed --- /dev/null +++ b/main.go @@ -0,0 +1,177 @@ +package main + +import ( + "encoding/xml" + "errors" + "fmt" + "log" + "os" + "regexp" +) + +type OsmBounds struct { + MinLat float64 `xml:"minlat,attr"` + MinLon float64 `xml:"minlon,attr"` + MaxLat float64 `xml:"maxlat,attr"` + MaxLon float64 `xml:"maxlon,attr"` +} + +type OsmNode struct { + Id int64 `xml:"id,attr"` + Lat float64 `xml:"lat,attr"` + Lon float64 `xml:"lon,attr"` + Version int64 `xml:"version,attr"` + Timestamp string `xml:"timestamp,attr"` + Changeset int64 `xml:"changeset,attr"` + Uid int64 `xml:"uid,attr"` + User string `xml:"user,attr"` +} + +type OsmMember struct { + Type string `xml:"type,attr"` + Ref int64 `xml:"ref,attr"` + Role string `xml:"role,attr"` +} + +type OsmTag struct { + K string `xml:"k,attr"` + V string `xml:"v,attr"` +} + +type OsmRelation struct { + Id int64 `xml:"id,attr"` + Version int32 `xml:"version,attr"` + Timestamp string `xml:"timestamp,attr"` + Changeset int64 `xml:"changeset,attr"` + Uid int64 `xml:"uid,attr"` + User string `xml:"user,attr"` + Members []OsmMember `xml:"member"` + Tags []OsmTag `xml:"tag"` +} + +type OsmDocument struct { + Note string `xml:"note"` + Bounds OsmBounds `xml:"bounds"` + Nodes []OsmNode `xml:"node"` + Relations []OsmRelation `xml:"relation"` +} + +type CombiData struct { + /// Name of the operator of the route. + /// E.g.: "C7 - AqpMasivo" + Company string + /// Name of the route + /// E.g.: "Combi B: Villa Santa Rosa -> Terminal Terrestre" + Name string + /// Contains a single, identifiable name for the route. + /// This is the Name field, but without any extra info + /// E.g.: "B" + ParsedName string +} + +func main() { + log.Println("Begin processing") + + xmlData, err := os.ReadFile("aqp_map.xml") + if err != nil { + panic(err) + } + + var osmDocument OsmDocument + err = xml.Unmarshal(xmlData, &osmDocument) + if err != nil { + panic(err) + } + log.Println("XML unmarshal finished") + + // Get the relation with id 17642638, + // that relation hosts the SIT data + var sitId int64 = 17642638 + var sitRelation *OsmRelation + for _, relation := range osmDocument.Relations { + if relation.Id == sitId { + sitRelation = &relation + } + } + if sitRelation == nil { + panic("SIT relation with id 17642638 not found!") + } + + // (naively) get all the submembers + sitMembers := make([]OsmRelation, 0) + for _, member := range sitRelation.Members { + // search the member + // TODO: determine if this is a performance bottleneck, + // and requires optimization (insertion sort & binary search) + for _, relation := range osmDocument.Relations { + if relation.Id == member.Ref { + sitMembers = append(sitMembers, relation) + } + } + } + + // transform sitMembers into CombiData + combis := make([]CombiData, 0) + for _, member := range sitMembers { + var operatorTag *OsmTag + var nameTag *OsmTag + + for _, tag := range member.Tags { + if tag.K == "operator" { + operatorTag = &tag + continue + } + if tag.K == "name" { + nameTag = &tag + continue + } + } + + if operatorTag == nil { + log.Fatalf("Found a SIT member without an operator tag, with id %d\n", member.Id) + } + if nameTag == nil { + log.Fatalf("Found a SIT member without a name tag, with id %d\n", member.Id) + } + + parsedName, err := parseRoute(nameTag.V) + if err != nil { + log.Printf("SIT member id: %s\n", member.Id) + panic(err) + } + + combis = append(combis, CombiData{ + Company: operatorTag.V, + Name: nameTag.V, + ParsedName: parsedName, + }) + } + + for _, combi := range combis { + log.Printf("%s: `%s`\n", combi.Company, combi.ParsedName) + } +} + +func parseRoute(routeName string) (string, error) { + // There are 3 types of route name formats: + // Combi xx : ____ + // Curster xx : ____ + // xx : ___ + + // attempt first format + match := regexp.MustCompile("Combi (.+) :.+").FindStringSubmatch(routeName) + if match != nil { + return match[1], nil + } + match = regexp.MustCompile("Custer (.+) :.+").FindStringSubmatch(routeName) + if match != nil { + return match[1], nil + } + match = regexp.MustCompile("(.+) :.+").FindStringSubmatch(routeName) + if match != nil { + return match[1], nil + } + + // Return an error + return "", errors.New(fmt.Sprintf("Route %s did not match any pattern", routeName)) +}